From 1bf17c57eaac947dbed22d6404e3d384827feff9 Mon Sep 17 00:00:00 2001 From: Trevor Whitney Date: Tue, 12 Mar 2024 15:43:35 -0600 Subject: [PATCH 01/25] chore: fix backports to work with conventional commits (#12188) --- .github/workflows/backport.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/backport.yml b/.github/workflows/backport.yml index 820d2758307ef..3b6859b726b7d 100644 --- a/.github/workflows/backport.yml +++ b/.github/workflows/backport.yml @@ -23,4 +23,4 @@ jobs: metricsWriteAPIKey: ${{secrets.GRAFANA_MISC_STATS_API_KEY}} token: ${{secrets.GH_BOT_ACCESS_TOKEN}} labelsToAdd: "backport" - title: "[{{base}}] {{originalTitle}}" \ No newline at end of file + title: "chore: [{{base}}] {{originalTitle}}" From bd3f83a45dcd82c766afea26122b9fd4c4f5ac69 Mon Sep 17 00:00:00 2001 From: Paul Rogers <129207811+paul1r@users.noreply.github.com> Date: Tue, 12 Mar 2024 20:58:49 -0400 Subject: [PATCH 02/25] test: Add synchronization for fakeTailer responses (#12186) --- pkg/ingester/tailer_test.go | 32 +++++++++++++++++++++++++++++++- 1 file changed, 31 insertions(+), 1 deletion(-) diff --git a/pkg/ingester/tailer_test.go b/pkg/ingester/tailer_test.go index 11de0d4daf82c..fa44cc0a7dcb8 100644 --- a/pkg/ingester/tailer_test.go +++ b/pkg/ingester/tailer_test.go @@ -149,10 +149,13 @@ func Test_dropstream(t *testing.T) { } type fakeTailServer struct { - responses []logproto.TailResponse + responses []logproto.TailResponse + responsesMu sync.Mutex } func (f *fakeTailServer) Send(response *logproto.TailResponse) error { + f.responsesMu.Lock() + defer f.responsesMu.Unlock() f.responses = append(f.responses, *response) return nil @@ -160,11 +163,38 @@ func (f *fakeTailServer) Send(response *logproto.TailResponse) error { func (f *fakeTailServer) Context() context.Context { return context.Background() } +func cloneTailResponse(response logproto.TailResponse) logproto.TailResponse { + var clone logproto.TailResponse + if response.Stream != nil { + clone.Stream = &logproto.Stream{} + clone.Stream.Labels = response.Stream.Labels + clone.Stream.Hash = response.Stream.Hash + if response.Stream.Entries != nil { + clone.Stream.Entries = make([]logproto.Entry, len(response.Stream.Entries)) + copy(clone.Stream.Entries, response.Stream.Entries) + } + } + if response.DroppedStreams != nil { + clone.DroppedStreams = make([]*logproto.DroppedStream, len(response.DroppedStreams)) + copy(clone.DroppedStreams, response.DroppedStreams) + } + + return clone +} + func (f *fakeTailServer) GetResponses() []logproto.TailResponse { + f.responsesMu.Lock() + defer f.responsesMu.Unlock() + clonedResponses := make([]logproto.TailResponse, len(f.responses)) + for i, resp := range f.responses { + clonedResponses[i] = cloneTailResponse(resp) + } return f.responses } func (f *fakeTailServer) Reset() { + f.responsesMu.Lock() + defer f.responsesMu.Unlock() f.responses = f.responses[:0] } From 8a8756aa7a4f2750b56b0a4681e42351759c980c Mon Sep 17 00:00:00 2001 From: Owen Diehl Date: Wed, 13 Mar 2024 00:14:16 -0700 Subject: [PATCH 03/25] chore(blooms): honors bloom_compactor_enabled for tenants (#12190) --- docs/sources/configure/_index.md | 11 +---------- pkg/bloomcompactor/bloomcompactor.go | 3 +++ pkg/bloomcompactor/bloomcompactor_test.go | 10 +--------- pkg/bloomcompactor/config.go | 2 -- pkg/validation/limits.go | 14 +------------- 5 files changed, 6 insertions(+), 34 deletions(-) diff --git a/docs/sources/configure/_index.md b/docs/sources/configure/_index.md index 8a0b9f4016122..244a2f011f38f 100644 --- a/docs/sources/configure/_index.md +++ b/docs/sources/configure/_index.md @@ -3181,21 +3181,12 @@ shard_streams: # The shard size defines how many bloom compactors should be used by a tenant # when computing blooms. If it's set to 0, shuffle sharding is disabled. # CLI flag: -bloom-compactor.shard-size -[bloom_compactor_shard_size: | default = 1] - -# The maximum age of a table before it is compacted. Do not compact tables older -# than the the configured time. Default to 7 days. 0s means no limit. -# CLI flag: -bloom-compactor.max-table-age -[bloom_compactor_max_table_age: | default = 168h] +[bloom_compactor_shard_size: | default = 0] # Whether to compact chunks into bloom filters. # CLI flag: -bloom-compactor.enable-compaction [bloom_compactor_enable_compaction: | default = false] -# The batch size of the chunks the bloom-compactor downloads at once. -# CLI flag: -bloom-compactor.chunks-batch-size -[bloom_compactor_chunks_batch_size: | default = 100] - # Length of the n-grams created when computing blooms from log lines. # CLI flag: -bloom-compactor.ngram-length [bloom_ngram_length: | default = 4] diff --git a/pkg/bloomcompactor/bloomcompactor.go b/pkg/bloomcompactor/bloomcompactor.go index a36d7b1e52d0d..6707213ce64bb 100644 --- a/pkg/bloomcompactor/bloomcompactor.go +++ b/pkg/bloomcompactor/bloomcompactor.go @@ -187,6 +187,9 @@ func (c *Compactor) tenants(ctx context.Context, table config.DayTable) (*v1.Sli // ownsTenant returns the ownership range for the tenant, if the compactor owns the tenant, and an error. func (c *Compactor) ownsTenant(tenant string) ([]v1.FingerprintBounds, bool, error) { + if !c.limits.BloomCompactorEnabled(tenant) { + return nil, false, nil + } tenantRing, owned := c.sharding.OwnsTenant(tenant) if !owned { return nil, false, nil diff --git a/pkg/bloomcompactor/bloomcompactor_test.go b/pkg/bloomcompactor/bloomcompactor_test.go index 375fb7102046b..71d5b843ca04b 100644 --- a/pkg/bloomcompactor/bloomcompactor_test.go +++ b/pkg/bloomcompactor/bloomcompactor_test.go @@ -166,16 +166,8 @@ func (m mockLimits) BloomCompactorShardSize(_ string) int { return m.shardSize } -func (m mockLimits) BloomCompactorChunksBatchSize(_ string) int { - panic("implement me") -} - -func (m mockLimits) BloomCompactorMaxTableAge(_ string) time.Duration { - panic("implement me") -} - func (m mockLimits) BloomCompactorEnabled(_ string) bool { - panic("implement me") + return true } func (m mockLimits) BloomNGramLength(_ string) int { diff --git a/pkg/bloomcompactor/config.go b/pkg/bloomcompactor/config.go index c97a75476d47c..b887493c1a867 100644 --- a/pkg/bloomcompactor/config.go +++ b/pkg/bloomcompactor/config.go @@ -71,8 +71,6 @@ func (cfg *RingConfig) RegisterFlagsWithPrefix(flagsPrefix, storePrefix string, type Limits interface { downloads.Limits BloomCompactorShardSize(tenantID string) int - BloomCompactorChunksBatchSize(userID string) int - BloomCompactorMaxTableAge(tenantID string) time.Duration BloomCompactorEnabled(tenantID string) bool BloomNGramLength(tenantID string) int BloomNGramSkip(tenantID string) int diff --git a/pkg/validation/limits.go b/pkg/validation/limits.go index 088fbc9dc6fe8..906b4f5350943 100644 --- a/pkg/validation/limits.go +++ b/pkg/validation/limits.go @@ -192,9 +192,7 @@ type Limits struct { BloomGatewayEnabled bool `yaml:"bloom_gateway_enable_filtering" json:"bloom_gateway_enable_filtering"` BloomCompactorShardSize int `yaml:"bloom_compactor_shard_size" json:"bloom_compactor_shard_size"` - BloomCompactorMaxTableAge time.Duration `yaml:"bloom_compactor_max_table_age" json:"bloom_compactor_max_table_age"` BloomCompactorEnabled bool `yaml:"bloom_compactor_enable_compaction" json:"bloom_compactor_enable_compaction"` - BloomCompactorChunksBatchSize int `yaml:"bloom_compactor_chunks_batch_size" json:"bloom_compactor_chunks_batch_size"` BloomNGramLength int `yaml:"bloom_ngram_length" json:"bloom_ngram_length"` BloomNGramSkip int `yaml:"bloom_ngram_skip" json:"bloom_ngram_skip"` BloomFalsePositiveRate float64 `yaml:"bloom_false_positive_rate" json:"bloom_false_positive_rate"` @@ -332,10 +330,8 @@ func (l *Limits) RegisterFlags(f *flag.FlagSet) { f.IntVar(&l.BloomGatewayShardSize, "bloom-gateway.shard-size", 0, "The shard size defines how many bloom gateways should be used by a tenant for querying.") f.BoolVar(&l.BloomGatewayEnabled, "bloom-gateway.enable-filtering", false, "Whether to use the bloom gateway component in the read path to filter chunks.") - f.IntVar(&l.BloomCompactorShardSize, "bloom-compactor.shard-size", 1, "The shard size defines how many bloom compactors should be used by a tenant when computing blooms. If it's set to 0, shuffle sharding is disabled.") - f.DurationVar(&l.BloomCompactorMaxTableAge, "bloom-compactor.max-table-age", 7*24*time.Hour, "The maximum age of a table before it is compacted. Do not compact tables older than the the configured time. Default to 7 days. 0s means no limit.") + f.IntVar(&l.BloomCompactorShardSize, "bloom-compactor.shard-size", 0, "The shard size defines how many bloom compactors should be used by a tenant when computing blooms. If it's set to 0, shuffle sharding is disabled.") f.BoolVar(&l.BloomCompactorEnabled, "bloom-compactor.enable-compaction", false, "Whether to compact chunks into bloom filters.") - f.IntVar(&l.BloomCompactorChunksBatchSize, "bloom-compactor.chunks-batch-size", 100, "The batch size of the chunks the bloom-compactor downloads at once.") f.IntVar(&l.BloomNGramLength, "bloom-compactor.ngram-length", 4, "Length of the n-grams created when computing blooms from log lines.") f.IntVar(&l.BloomNGramSkip, "bloom-compactor.ngram-skip", 1, "Skip factor for the n-grams created when computing blooms from log lines.") f.Float64Var(&l.BloomFalsePositiveRate, "bloom-compactor.false-positive-rate", 0.01, "Scalable Bloom Filter desired false-positive rate.") @@ -888,18 +884,10 @@ func (o *Overrides) BloomGatewayEnabled(userID string) bool { return o.getOverridesForUser(userID).BloomGatewayEnabled } -func (o *Overrides) BloomCompactorChunksBatchSize(userID string) int { - return o.getOverridesForUser(userID).BloomCompactorChunksBatchSize -} - func (o *Overrides) BloomCompactorShardSize(userID string) int { return o.getOverridesForUser(userID).BloomCompactorShardSize } -func (o *Overrides) BloomCompactorMaxTableAge(userID string) time.Duration { - return o.getOverridesForUser(userID).BloomCompactorMaxTableAge -} - func (o *Overrides) BloomCompactorEnabled(userID string) bool { return o.getOverridesForUser(userID).BloomCompactorEnabled } From 25f15cb3262cbefad43c755e576ec3d4837c8165 Mon Sep 17 00:00:00 2001 From: Sandeep Sukhani Date: Wed, 13 Mar 2024 14:11:05 +0530 Subject: [PATCH 04/25] feat: add support for setting custom grpc interceptors for requests sent to index-gateway (#12193) --- .../gatewayclient/gateway_client.go | 24 ++++++++++++++++++- 1 file changed, 23 insertions(+), 1 deletion(-) diff --git a/pkg/storage/stores/shipper/indexshipper/gatewayclient/gateway_client.go b/pkg/storage/stores/shipper/indexshipper/gatewayclient/gateway_client.go index 949627d29cff8..69553fc34b6c3 100644 --- a/pkg/storage/stores/shipper/indexshipper/gatewayclient/gateway_client.go +++ b/pkg/storage/stores/shipper/indexshipper/gatewayclient/gateway_client.go @@ -13,10 +13,13 @@ import ( "github.com/grafana/dskit/concurrency" "github.com/grafana/dskit/grpcclient" "github.com/grafana/dskit/instrument" + "github.com/grafana/dskit/middleware" "github.com/grafana/dskit/ring" "github.com/grafana/dskit/ring/client" "github.com/grafana/dskit/services" "github.com/grafana/dskit/tenant" + "github.com/grpc-ecosystem/grpc-opentracing/go/otgrpc" + "github.com/opentracing/opentracing-go" "github.com/pkg/errors" "github.com/prometheus/client_golang/prometheus" "google.golang.org/grpc" @@ -72,6 +75,9 @@ type IndexGatewayClientConfig struct { // LogGatewayRequests configures if requests sent to the gateway should be logged or not. // The log messages are of type debug and contain the address of the gateway and the relevant tenant. LogGatewayRequests bool `yaml:"log_gateway_requests"` + + GRPCUnaryClientInterceptors []grpc.UnaryClientInterceptor `yaml:"-"` + GRCPStreamClientInterceptors []grpc.StreamClientInterceptor `yaml:"-"` } // RegisterFlagsWithPrefix register client-specific flags with the given prefix. @@ -136,7 +142,7 @@ func NewGatewayClient(cfg IndexGatewayClientConfig, r prometheus.Registerer, lim done: make(chan struct{}), } - dialOpts, err := cfg.GRPCClientConfig.DialOption(grpcclient.Instrument(sgClient.storeGatewayClientRequestDuration)) + dialOpts, err := cfg.GRPCClientConfig.DialOption(instrumentation(cfg, sgClient.storeGatewayClientRequestDuration)) if err != nil { return nil, errors.Wrap(err, "index gateway grpc dial option") } @@ -458,3 +464,19 @@ func (b *grpcIter) RangeValue() []byte { func (b *grpcIter) Value() []byte { return b.Rows[b.i].Value } + +func instrumentation(cfg IndexGatewayClientConfig, clientRequestDuration *prometheus.HistogramVec) ([]grpc.UnaryClientInterceptor, []grpc.StreamClientInterceptor) { + var unaryInterceptors []grpc.UnaryClientInterceptor + unaryInterceptors = append(unaryInterceptors, cfg.GRPCUnaryClientInterceptors...) + unaryInterceptors = append(unaryInterceptors, otgrpc.OpenTracingClientInterceptor(opentracing.GlobalTracer())) + unaryInterceptors = append(unaryInterceptors, middleware.ClientUserHeaderInterceptor) + unaryInterceptors = append(unaryInterceptors, middleware.UnaryClientInstrumentInterceptor(clientRequestDuration)) + + var streamInterceptors []grpc.StreamClientInterceptor + streamInterceptors = append(streamInterceptors, cfg.GRCPStreamClientInterceptors...) + streamInterceptors = append(streamInterceptors, otgrpc.OpenTracingStreamClientInterceptor(opentracing.GlobalTracer())) + streamInterceptors = append(streamInterceptors, middleware.StreamClientUserHeaderInterceptor) + streamInterceptors = append(streamInterceptors, middleware.StreamClientInstrumentInterceptor(clientRequestDuration)) + + return unaryInterceptors, streamInterceptors +} From 2659e3597c4380c5a67c053f32159ef9bc9df905 Mon Sep 17 00:00:00 2001 From: Sandeep Sukhani Date: Wed, 13 Mar 2024 14:11:12 +0530 Subject: [PATCH 05/25] fix: fix middlewares being applied for series request when querier is running as a standalone service (#12194) --- pkg/loki/modules.go | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/pkg/loki/modules.go b/pkg/loki/modules.go index a3d9937734c83..e5ca22dd65344 100644 --- a/pkg/loki/modules.go +++ b/pkg/loki/modules.go @@ -415,6 +415,7 @@ func (t *Loki) initQuerier() (services.Service, error) { indexStatsHTTPMiddleware := querier.WrapQuerySpanAndTimeout("query.IndexStats", t.Overrides) volumeHTTPMiddleware := querier.WrapQuerySpanAndTimeout("query.VolumeInstant", t.Overrides) volumeRangeHTTPMiddleware := querier.WrapQuerySpanAndTimeout("query.VolumeRange", t.Overrides) + seriesHTTPMiddleware := querier.WrapQuerySpanAndTimeout("query.Series", t.Overrides) if t.supportIndexDeleteRequest() && t.Cfg.CompactorConfig.RetentionEnabled { toMerge = append( @@ -465,6 +466,7 @@ func (t *Loki) initQuerier() (services.Service, error) { indexStatsHTTPMiddleware = middleware.Merge(httpMiddleware, indexStatsHTTPMiddleware) volumeHTTPMiddleware = middleware.Merge(httpMiddleware, volumeHTTPMiddleware) volumeRangeHTTPMiddleware = middleware.Merge(httpMiddleware, volumeRangeHTTPMiddleware) + seriesHTTPMiddleware = middleware.Merge(httpMiddleware, seriesHTTPMiddleware) // First, register the internal querier handler with the external HTTP server router := t.Server.HTTP @@ -490,7 +492,7 @@ func (t *Loki) initQuerier() (services.Service, error) { router.Path("/loki/api/v1/labels").Methods("GET", "POST").Handler(labelsHTTPMiddleware.Wrap(httpHandler)) router.Path("/loki/api/v1/label/{name}/values").Methods("GET", "POST").Handler(labelsHTTPMiddleware.Wrap(httpHandler)) - router.Path("/loki/api/v1/series").Methods("GET", "POST").Handler(querier.WrapQuerySpanAndTimeout("query.Series", t.Overrides).Wrap(httpHandler)) + router.Path("/loki/api/v1/series").Methods("GET", "POST").Handler(seriesHTTPMiddleware.Wrap(httpHandler)) router.Path("/loki/api/v1/index/stats").Methods("GET", "POST").Handler(indexStatsHTTPMiddleware.Wrap(httpHandler)) router.Path("/loki/api/v1/index/volume").Methods("GET", "POST").Handler(volumeHTTPMiddleware.Wrap(httpHandler)) router.Path("/loki/api/v1/index/volume_range").Methods("GET", "POST").Handler(volumeRangeHTTPMiddleware.Wrap(httpHandler)) @@ -504,7 +506,7 @@ func (t *Loki) initQuerier() (services.Service, error) { router.Path("/api/prom/label").Methods("GET", "POST").Handler(labelsHTTPMiddleware.Wrap(httpHandler)) router.Path("/api/prom/label/{name}/values").Methods("GET", "POST").Handler(labelsHTTPMiddleware.Wrap(httpHandler)) - router.Path("/api/prom/series").Methods("GET", "POST").Handler(querier.WrapQuerySpanAndTimeout("query.Series", t.Overrides).Wrap(httpHandler)) + router.Path("/api/prom/series").Methods("GET", "POST").Handler(seriesHTTPMiddleware.Wrap(httpHandler)) } // We always want to register tail routes externally, tail requests are different from normal queries, they From 19f5f3fd443dc8759ecdd24e0d0d563889adb235 Mon Sep 17 00:00:00 2001 From: Cyril Tovena Date: Wed, 13 Mar 2024 10:15:36 +0100 Subject: [PATCH 06/25] fix: Fixes compactor shutdown log (#12195) --- pkg/compactor/compactor.go | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/pkg/compactor/compactor.go b/pkg/compactor/compactor.go index 8e3fa52126929..f5062f2d6e33e 100644 --- a/pkg/compactor/compactor.go +++ b/pkg/compactor/compactor.go @@ -512,7 +512,7 @@ func (c *Compactor) runCompactions(ctx context.Context) { // do the initial compaction if err := c.RunCompaction(ctx, false); err != nil { - level.Error(util_log.Logger).Log("msg", "failed to run compaction", err) + level.Error(util_log.Logger).Log("msg", "failed to run compaction", "err", err) } c.wg.Add(1) @@ -526,7 +526,7 @@ func (c *Compactor) runCompactions(ctx context.Context) { select { case <-ticker.C: if err := c.RunCompaction(ctx, false); err != nil { - level.Error(util_log.Logger).Log("msg", "failed to run compaction", err) + level.Error(util_log.Logger).Log("msg", "failed to run compaction", "err", err) } case <-ctx.Done(): return @@ -539,7 +539,7 @@ func (c *Compactor) runCompactions(ctx context.Context) { go func() { defer c.wg.Done() if err := c.RunCompaction(ctx, true); err != nil { - level.Error(util_log.Logger).Log("msg", "failed to apply retention", err) + level.Error(util_log.Logger).Log("msg", "failed to apply retention", "err", err) } ticker := time.NewTicker(c.cfg.ApplyRetentionInterval) @@ -549,7 +549,7 @@ func (c *Compactor) runCompactions(ctx context.Context) { select { case <-ticker.C: if err := c.RunCompaction(ctx, true); err != nil { - level.Error(util_log.Logger).Log("msg", "failed to apply retention", err) + level.Error(util_log.Logger).Log("msg", "failed to apply retention", "err", err) } case <-ctx.Done(): return @@ -876,7 +876,6 @@ func SortTablesByRange(tables []string) { // less than if start time is after produces a most recent first sort order return tableRanges[tables[i]].Start.After(tableRanges[tables[j]].Start) }) - } func SchemaPeriodForTable(cfg config.SchemaConfig, tableName string) (config.PeriodConfig, bool) { From 8b9d8b0226e3474c10fc7d4587fcdf5ca7db369b Mon Sep 17 00:00:00 2001 From: Sandeep Sukhani Date: Wed, 13 Mar 2024 17:13:13 +0530 Subject: [PATCH 07/25] feat: Make list of otel blessed attributes configurable (#12180) --- cmd/loki/main.go | 2 + docs/sources/configure/_index.md | 8 ++- docs/sources/send-data/otel/_index.md | 5 +- pkg/distributor/distributor.go | 3 + pkg/loghttp/push/otlp.go | 10 +-- pkg/loghttp/push/otlp_config.go | 97 +++++++++++++-------------- pkg/loghttp/push/otlp_config_test.go | 16 +++-- pkg/loghttp/push/otlp_test.go | 16 ++--- pkg/loghttp/push/push.go | 2 +- pkg/validation/limits.go | 26 +++++-- 10 files changed, 105 insertions(+), 80 deletions(-) diff --git a/cmd/loki/main.go b/cmd/loki/main.go index 937a5c16fab80..20a5925acbb4a 100644 --- a/cmd/loki/main.go +++ b/cmd/loki/main.go @@ -43,6 +43,8 @@ func main() { os.Exit(1) } + // Set the global OTLP config which is needed in per tenant otlp config + config.LimitsConfig.SetGlobalOTLPConfig(config.Distributor.OTLPConfig) // This global is set to the config passed into the last call to `NewOverrides`. If we don't // call it atleast once, the defaults are set to an empty struct. // We call it with the flag values so that the config file unmarshalling only overrides the values set in the config. diff --git a/docs/sources/configure/_index.md b/docs/sources/configure/_index.md index 244a2f011f38f..18fcc83bb5929 100644 --- a/docs/sources/configure/_index.md +++ b/docs/sources/configure/_index.md @@ -551,6 +551,11 @@ write_failures_logging: # logged or not. Default: false. # CLI flag: -distributor.write-failures-logging.add-insights-label [add_insights_label: | default = false] + +otlp_config: + # List of default otlp resource attributes to be picked as index labels + # CLI flag: -distributor.otlp.default_resource_attributes_as_index_labels + [default_resource_attributes_as_index_labels: | default = [service.name service.namespace service.instance.id deployment.environment cloud.region cloud.availability_zone k8s.cluster.name k8s.namespace.name k8s.pod.name k8s.container.name container.name k8s.replicaset.name k8s.deployment.name k8s.statefulset.name k8s.daemonset.name k8s.cronjob.name k8s.job.name]] ``` ### querier @@ -3230,7 +3235,8 @@ otlp_config: # Configuration for resource attributes to store them as index labels or # Structured Metadata or drop them altogether resource_attributes: - # Configure whether to ignore the default list of resource attributes to be + # Configure whether to ignore the default list of resource attributes set in + # 'distributor.otlp.default_resource_attributes_as_index_labels' to be # stored as index labels and only use the given resource attributes config [ignore_defaults: | default = false] diff --git a/docs/sources/send-data/otel/_index.md b/docs/sources/send-data/otel/_index.md index 12f9cdd0e4af5..dac87a4fb5d56 100644 --- a/docs/sources/send-data/otel/_index.md +++ b/docs/sources/send-data/otel/_index.md @@ -124,8 +124,9 @@ otlp_config: # Configuration for Resource Attributes to store them as index labels or # Structured Metadata or drop them altogether resource_attributes: - # Configure whether to ignore the default list of Resource Attributes to be - # stored as Index Labels and only use the given Resource Attributes config + # Configure whether to ignore the default list of resource attributes set in + # 'distributor.otlp.default_resource_attributes_as_index_labels' to be + # stored as index labels and only use the given resource attributes config [ignore_defaults: ] [attributes_config: ] diff --git a/pkg/distributor/distributor.go b/pkg/distributor/distributor.go index e09be8f9a6378..0ea3df7b69799 100644 --- a/pkg/distributor/distributor.go +++ b/pkg/distributor/distributor.go @@ -74,10 +74,13 @@ type Config struct { // WriteFailuresLoggingCfg customizes write failures logging behavior. WriteFailuresLogging writefailures.Cfg `yaml:"write_failures_logging" doc:"description=Experimental. Customize the logging of write failures."` + + OTLPConfig push.GlobalOTLPConfig `yaml:"otlp_config"` } // RegisterFlags registers distributor-related flags. func (cfg *Config) RegisterFlags(fs *flag.FlagSet) { + cfg.OTLPConfig.RegisterFlags(fs) cfg.DistributorRing.RegisterFlags(fs) cfg.RateStore.RegisterFlagsWithPrefix("distributor.rate-store", fs) cfg.WriteFailuresLogging.RegisterFlagsWithPrefix("distributor.write-failures-logging", fs) diff --git a/pkg/loghttp/push/otlp.go b/pkg/loghttp/push/otlp.go index b5ba7e8a8528f..58a594b01221c 100644 --- a/pkg/loghttp/push/otlp.go +++ b/pkg/loghttp/push/otlp.go @@ -28,14 +28,6 @@ const ( attrServiceName = "service.name" ) -var blessedAttributesNormalized = make([]string, len(blessedAttributes)) - -func init() { - for i := range blessedAttributes { - blessedAttributesNormalized[i] = prometheustranslator.NormalizeLabel(blessedAttributes[i]) - } -} - func newPushStats() *Stats { return &Stats{ LogLinesBytes: map[time.Duration]int64{}, @@ -118,7 +110,7 @@ func otlpToLokiPushRequest(ld plog.Logs, userID string, tenantsRetention Tenants resAttrs.PutStr(attrServiceName, "unknown_service") } resourceAttributesAsStructuredMetadata := make(push.LabelsAdapter, 0, resAttrs.Len()) - streamLabels := make(model.LabelSet, len(blessedAttributesNormalized)) + streamLabels := make(model.LabelSet, 30) // we have a default labels limit of 30 so just initialize the map of same size resAttrs.Range(func(k string, v pcommon.Value) bool { action := otlpConfig.ActionForResourceAttribute(k) diff --git a/pkg/loghttp/push/otlp_config.go b/pkg/loghttp/push/otlp_config.go index 44c0e932f9c12..f71efe8bee7dd 100644 --- a/pkg/loghttp/push/otlp_config.go +++ b/pkg/loghttp/push/otlp_config.go @@ -1,31 +1,13 @@ package push import ( + "flag" "fmt" + "github.com/grafana/dskit/flagext" "github.com/prometheus/prometheus/model/relabel" ) -var blessedAttributes = []string{ - "service.name", - "service.namespace", - "service.instance.id", - "deployment.environment", - "cloud.region", - "cloud.availability_zone", - "k8s.cluster.name", - "k8s.namespace.name", - "k8s.pod.name", - "k8s.container.name", - "container.name", - "k8s.replicaset.name", - "k8s.deployment.name", - "k8s.statefulset.name", - "k8s.daemonset.name", - "k8s.cronjob.name", - "k8s.job.name", -} - // Action is the action to be performed on OTLP Resource Attribute. type Action string @@ -44,15 +26,17 @@ var ( errAttributesAndRegexBothSet = fmt.Errorf("only one of attributes or regex must be set") ) -var DefaultOTLPConfig = OTLPConfig{ - ResourceAttributes: ResourceAttributesConfig{ - AttributesConfig: []AttributesConfig{ - { - Action: IndexLabel, - Attributes: blessedAttributes, +func DefaultOTLPConfig(cfg GlobalOTLPConfig) OTLPConfig { + return OTLPConfig{ + ResourceAttributes: ResourceAttributesConfig{ + AttributesConfig: []AttributesConfig{ + { + Action: IndexLabel, + Attributes: cfg.DefaultOTLPResourceAttributesAsIndexLabels, + }, }, }, - }, + } } type OTLPConfig struct { @@ -61,14 +45,44 @@ type OTLPConfig struct { LogAttributes []AttributesConfig `yaml:"log_attributes,omitempty" doc:"description=Configuration for log attributes to store them as Structured Metadata or drop them altogether"` } -func (c *OTLPConfig) UnmarshalYAML(unmarshal func(interface{}) error) error { - *c = DefaultOTLPConfig - type plain OTLPConfig - if err := unmarshal((*plain)(c)); err != nil { - return err +type GlobalOTLPConfig struct { + DefaultOTLPResourceAttributesAsIndexLabels []string `yaml:"default_resource_attributes_as_index_labels"` +} + +// RegisterFlags registers distributor-related flags. +func (cfg *GlobalOTLPConfig) RegisterFlags(fs *flag.FlagSet) { + cfg.DefaultOTLPResourceAttributesAsIndexLabels = []string{ + "service.name", + "service.namespace", + "service.instance.id", + "deployment.environment", + "cloud.region", + "cloud.availability_zone", + "k8s.cluster.name", + "k8s.namespace.name", + "k8s.pod.name", + "k8s.container.name", + "container.name", + "k8s.replicaset.name", + "k8s.deployment.name", + "k8s.statefulset.name", + "k8s.daemonset.name", + "k8s.cronjob.name", + "k8s.job.name", } + fs.Var((*flagext.StringSlice)(&cfg.DefaultOTLPResourceAttributesAsIndexLabels), "distributor.otlp.default_resource_attributes_as_index_labels", "List of default otlp resource attributes to be picked as index labels") +} - return nil +// ApplyGlobalOTLPConfig applies global otlp config, specifically DefaultOTLPResourceAttributesAsIndexLabels for the start. +func (c *OTLPConfig) ApplyGlobalOTLPConfig(config GlobalOTLPConfig) { + if !c.ResourceAttributes.IgnoreDefaults && len(config.DefaultOTLPResourceAttributesAsIndexLabels) != 0 { + c.ResourceAttributes.AttributesConfig = append([]AttributesConfig{ + { + Action: IndexLabel, + Attributes: config.DefaultOTLPResourceAttributesAsIndexLabels, + }, + }, c.ResourceAttributes.AttributesConfig...) + } } func (c *OTLPConfig) actionForAttribute(attribute string, cfgs []AttributesConfig) Action { @@ -146,21 +160,6 @@ func (c *AttributesConfig) UnmarshalYAML(unmarshal func(interface{}) error) erro } type ResourceAttributesConfig struct { - IgnoreDefaults bool `yaml:"ignore_defaults,omitempty" doc:"default=false|description=Configure whether to ignore the default list of resource attributes to be stored as index labels and only use the given resource attributes config"` + IgnoreDefaults bool `yaml:"ignore_defaults,omitempty" doc:"default=false|description=Configure whether to ignore the default list of resource attributes set in 'distributor.otlp.default_resource_attributes_as_index_labels' to be stored as index labels and only use the given resource attributes config"` AttributesConfig []AttributesConfig `yaml:"attributes_config,omitempty"` } - -func (c *ResourceAttributesConfig) UnmarshalYAML(unmarshal func(interface{}) error) error { - type plain ResourceAttributesConfig - if err := unmarshal((*plain)(c)); err != nil { - return err - } - - if !c.IgnoreDefaults { - c.AttributesConfig = append([]AttributesConfig{ - DefaultOTLPConfig.ResourceAttributes.AttributesConfig[0], - }, c.AttributesConfig...) - } - - return nil -} diff --git a/pkg/loghttp/push/otlp_config_test.go b/pkg/loghttp/push/otlp_config_test.go index 5fa6251628507..de5b9b281a37c 100644 --- a/pkg/loghttp/push/otlp_config_test.go +++ b/pkg/loghttp/push/otlp_config_test.go @@ -3,11 +3,18 @@ package push import ( "testing" + "github.com/grafana/dskit/flagext" "github.com/prometheus/prometheus/model/relabel" "github.com/stretchr/testify/require" "gopkg.in/yaml.v2" ) +var defaultGlobalOTLPConfig = GlobalOTLPConfig{} + +func init() { + flagext.DefaultValues(&defaultGlobalOTLPConfig) +} + func TestUnmarshalOTLPConfig(t *testing.T) { for _, tc := range []struct { name string @@ -25,7 +32,7 @@ resource_attributes: expectedCfg: OTLPConfig{ ResourceAttributes: ResourceAttributesConfig{ AttributesConfig: []AttributesConfig{ - DefaultOTLPConfig.ResourceAttributes.AttributesConfig[0], + DefaultOTLPConfig(defaultGlobalOTLPConfig).ResourceAttributes.AttributesConfig[0], { Action: IndexLabel, Regex: relabel.MustNewRegexp("foo"), @@ -66,7 +73,7 @@ scope_attributes: AttributesConfig: []AttributesConfig{ { Action: IndexLabel, - Attributes: blessedAttributes, + Attributes: defaultGlobalOTLPConfig.DefaultOTLPResourceAttributesAsIndexLabels, }, }, }, @@ -96,7 +103,7 @@ log_attributes: expectedCfg: OTLPConfig{ ResourceAttributes: ResourceAttributesConfig{ AttributesConfig: []AttributesConfig{ - DefaultOTLPConfig.ResourceAttributes.AttributesConfig[0], + DefaultOTLPConfig(defaultGlobalOTLPConfig).ResourceAttributes.AttributesConfig[0], { Action: IndexLabel, Regex: relabel.MustNewRegexp("foo"), @@ -151,6 +158,7 @@ log_attributes: require.ErrorIs(t, err, tc.expectedErr) return } + cfg.ApplyGlobalOTLPConfig(defaultGlobalOTLPConfig) require.Equal(t, tc.expectedCfg, cfg) }) } @@ -171,7 +179,7 @@ func TestOTLPConfig(t *testing.T) { }{ { name: "default OTLPConfig", - otlpConfig: DefaultOTLPConfig, + otlpConfig: DefaultOTLPConfig(defaultGlobalOTLPConfig), resAttrs: []attrAndExpAction{ { attr: attrServiceName, diff --git a/pkg/loghttp/push/otlp_test.go b/pkg/loghttp/push/otlp_test.go index deeed7683e5c9..3f8e3e8a92ca7 100644 --- a/pkg/loghttp/push/otlp_test.go +++ b/pkg/loghttp/push/otlp_test.go @@ -34,7 +34,7 @@ func TestOTLPToLokiPushRequest(t *testing.T) { }, expectedPushRequest: logproto.PushRequest{}, expectedStats: *newPushStats(), - otlpConfig: DefaultOTLPConfig, + otlpConfig: DefaultOTLPConfig(defaultGlobalOTLPConfig), }, { name: "resource with no logs", @@ -45,11 +45,11 @@ func TestOTLPToLokiPushRequest(t *testing.T) { }, expectedPushRequest: logproto.PushRequest{}, expectedStats: *newPushStats(), - otlpConfig: DefaultOTLPConfig, + otlpConfig: DefaultOTLPConfig(defaultGlobalOTLPConfig), }, { name: "resource with a log entry", - otlpConfig: DefaultOTLPConfig, + otlpConfig: DefaultOTLPConfig(defaultGlobalOTLPConfig), generateLogs: func() plog.Logs { ld := plog.NewLogs() ld.ResourceLogs().AppendEmpty().Resource().Attributes().PutStr("service.name", "service-1") @@ -85,7 +85,7 @@ func TestOTLPToLokiPushRequest(t *testing.T) { }, { name: "no resource attributes defined", - otlpConfig: DefaultOTLPConfig, + otlpConfig: DefaultOTLPConfig(defaultGlobalOTLPConfig), generateLogs: func() plog.Logs { ld := plog.NewLogs() ld.ResourceLogs().AppendEmpty() @@ -121,7 +121,7 @@ func TestOTLPToLokiPushRequest(t *testing.T) { }, { name: "service.name not defined in resource attributes", - otlpConfig: DefaultOTLPConfig, + otlpConfig: DefaultOTLPConfig(defaultGlobalOTLPConfig), tracker: NewMockTracker(), generateLogs: func() plog.Logs { ld := plog.NewLogs() @@ -183,7 +183,7 @@ func TestOTLPToLokiPushRequest(t *testing.T) { }, { name: "resource attributes and scope attributes stored as structured metadata", - otlpConfig: DefaultOTLPConfig, + otlpConfig: DefaultOTLPConfig(defaultGlobalOTLPConfig), generateLogs: func() plog.Logs { ld := plog.NewLogs() ld.ResourceLogs().AppendEmpty() @@ -258,7 +258,7 @@ func TestOTLPToLokiPushRequest(t *testing.T) { }, { name: "attributes with nested data", - otlpConfig: DefaultOTLPConfig, + otlpConfig: DefaultOTLPConfig(defaultGlobalOTLPConfig), generateLogs: func() plog.Logs { ld := plog.NewLogs() ld.ResourceLogs().AppendEmpty() @@ -573,7 +573,7 @@ func TestOTLPLogToPushEntry(t *testing.T) { }, } { t.Run(tc.name, func(t *testing.T) { - require.Equal(t, tc.expectedResp, otlpLogToPushEntry(tc.buildLogRecord(), DefaultOTLPConfig)) + require.Equal(t, tc.expectedResp, otlpLogToPushEntry(tc.buildLogRecord(), DefaultOTLPConfig(defaultGlobalOTLPConfig))) }) } diff --git a/pkg/loghttp/push/push.go b/pkg/loghttp/push/push.go index 0a3b444e95926..f2acc8717539a 100644 --- a/pkg/loghttp/push/push.go +++ b/pkg/loghttp/push/push.go @@ -66,7 +66,7 @@ type Limits interface { type EmptyLimits struct{} func (EmptyLimits) OTLPConfig(string) OTLPConfig { - return DefaultOTLPConfig + return DefaultOTLPConfig(GlobalOTLPConfig{}) } type RequestParser func(userID string, r *http.Request, tenantsRetention TenantsRetention, limits Limits, tracker UsageTracker) (*logproto.PushRequest, *Stats, error) diff --git a/pkg/validation/limits.go b/pkg/validation/limits.go index 906b4f5350943..2be257af59cb5 100644 --- a/pkg/validation/limits.go +++ b/pkg/validation/limits.go @@ -200,10 +200,11 @@ type Limits struct { BloomGatewayCacheKeyInterval time.Duration `yaml:"bloom_gateway_cache_key_interval" json:"bloom_gateway_cache_key_interval"` BloomCompactorMaxBlockSize flagext.ByteSize `yaml:"bloom_compactor_max_block_size" json:"bloom_compactor_max_block_size"` - AllowStructuredMetadata bool `yaml:"allow_structured_metadata,omitempty" json:"allow_structured_metadata,omitempty" doc:"description=Allow user to send structured metadata in push payload."` - MaxStructuredMetadataSize flagext.ByteSize `yaml:"max_structured_metadata_size" json:"max_structured_metadata_size" doc:"description=Maximum size accepted for structured metadata per log line."` - MaxStructuredMetadataEntriesCount int `yaml:"max_structured_metadata_entries_count" json:"max_structured_metadata_entries_count" doc:"description=Maximum number of structured metadata entries per log line."` - OTLPConfig push.OTLPConfig `yaml:"otlp_config" json:"otlp_config" doc:"description=OTLP log ingestion configurations"` + AllowStructuredMetadata bool `yaml:"allow_structured_metadata,omitempty" json:"allow_structured_metadata,omitempty" doc:"description=Allow user to send structured metadata in push payload."` + MaxStructuredMetadataSize flagext.ByteSize `yaml:"max_structured_metadata_size" json:"max_structured_metadata_size" doc:"description=Maximum size accepted for structured metadata per log line."` + MaxStructuredMetadataEntriesCount int `yaml:"max_structured_metadata_entries_count" json:"max_structured_metadata_entries_count" doc:"description=Maximum number of structured metadata entries per log line."` + OTLPConfig push.OTLPConfig `yaml:"otlp_config" json:"otlp_config" doc:"description=OTLP log ingestion configurations"` + GlobalOTLPConfig push.GlobalOTLPConfig `yaml:"-" json:"-"` } type StreamRetention struct { @@ -354,7 +355,12 @@ func (l *Limits) RegisterFlags(f *flag.FlagSet) { _ = l.MaxStructuredMetadataSize.Set(defaultMaxStructuredMetadataSize) f.Var(&l.MaxStructuredMetadataSize, "limits.max-structured-metadata-size", "Maximum size accepted for structured metadata per entry. Default: 64 kb. Any log line exceeding this limit will be discarded. There is no limit when unset or set to 0.") f.IntVar(&l.MaxStructuredMetadataEntriesCount, "limits.max-structured-metadata-entries-count", defaultMaxStructuredMetadataCount, "Maximum number of structured metadata entries per log line. Default: 128. Any log line exceeding this limit will be discarded. There is no limit when unset or set to 0.") - l.OTLPConfig = push.DefaultOTLPConfig +} + +// SetGlobalOTLPConfig set GlobalOTLPConfig which is used while unmarshaling per-tenant otlp config to use the default list of resource attributes picked as index labels. +func (l *Limits) SetGlobalOTLPConfig(cfg push.GlobalOTLPConfig) { + l.GlobalOTLPConfig = cfg + l.OTLPConfig = push.DefaultOTLPConfig(cfg) } // UnmarshalYAML implements the yaml.Unmarshaler interface. @@ -374,7 +380,15 @@ func (l *Limits) UnmarshalYAML(unmarshal func(interface{}) error) error { return errors.Wrap(err, "cloning limits (unmarshaling)") } } - return unmarshal((*plain)(l)) + if err := unmarshal((*plain)(l)); err != nil { + return err + } + + if defaultLimits != nil { + // apply relevant bits from global otlp config + l.OTLPConfig.ApplyGlobalOTLPConfig(defaultLimits.GlobalOTLPConfig) + } + return nil } // Validate validates that this limits config is valid. From 74e94cd1c58f00de11c6b1890fd6778f2b4ca632 Mon Sep 17 00:00:00 2001 From: Sandeep Sukhani Date: Thu, 14 Mar 2024 14:37:05 +0530 Subject: [PATCH 08/25] fix: avoid wiping out delete request list cache in queriers when compactor is down (#12210) --- .../deletion/delete_requests_client.go | 11 ++++++---- .../deletion/delete_requests_client_test.go | 21 +++++++++++++++++++ 2 files changed, 28 insertions(+), 4 deletions(-) diff --git a/pkg/compactor/deletion/delete_requests_client.go b/pkg/compactor/deletion/delete_requests_client.go index c77723dd78f38..62b6f509880e2 100644 --- a/pkg/compactor/deletion/delete_requests_client.go +++ b/pkg/compactor/deletion/delete_requests_client.go @@ -98,22 +98,23 @@ func (c *deleteRequestsClient) updateLoop() { for { select { case <-t.C: - c.updateCache() + if err := c.updateCache(); err != nil { + level.Error(log.Logger).Log("msg", "error reloading cached delete requests", "err", err) + } case <-c.stopChan: return } } } -func (c *deleteRequestsClient) updateCache() { +func (c *deleteRequestsClient) updateCache() error { userIDs := c.currentUserIDs() newCache := make(map[string][]DeleteRequest) for _, userID := range userIDs { deleteReq, err := c.compactorClient.GetAllDeleteRequestsForUser(context.Background(), userID) if err != nil { - level.Error(log.Logger).Log("msg", "error getting delete requests from the store", "err", err) - continue + return err } newCache[userID] = deleteReq } @@ -121,6 +122,8 @@ func (c *deleteRequestsClient) updateCache() { c.mu.Lock() defer c.mu.Unlock() c.cache = newCache + + return nil } func (c *deleteRequestsClient) currentUserIDs() []string { diff --git a/pkg/compactor/deletion/delete_requests_client_test.go b/pkg/compactor/deletion/delete_requests_client_test.go index 2ba95b568f070..44c68e26b9e93 100644 --- a/pkg/compactor/deletion/delete_requests_client_test.go +++ b/pkg/compactor/deletion/delete_requests_client_test.go @@ -2,6 +2,7 @@ package deletion import ( "context" + "fmt" "sync" "testing" "time" @@ -63,6 +64,14 @@ func TestGetCacheGenNumberForUser(t *testing.T) { require.Nil(t, err) require.Equal(t, "different", deleteRequests[0].RequestID) + // failure in compactor calls should not wipe the cache + compactorClient.SetErr(fmt.Errorf("fail compactor calls")) + time.Sleep(200 * time.Millisecond) + + deleteRequests, err = client.GetAllDeleteRequestsForUser(context.Background(), "userID") + require.Nil(t, err) + require.Equal(t, "different", deleteRequests[0].RequestID) + client.Stop() }) } @@ -71,6 +80,7 @@ type mockCompactorClient struct { mx sync.Mutex delRequests []DeleteRequest cacheGenNum string + err error } func (m *mockCompactorClient) SetDeleteRequests(d []DeleteRequest) { @@ -80,12 +90,19 @@ func (m *mockCompactorClient) SetDeleteRequests(d []DeleteRequest) { } func (m *mockCompactorClient) GetAllDeleteRequestsForUser(_ context.Context, _ string) ([]DeleteRequest, error) { + if m.err != nil { + return nil, m.err + } m.mx.Lock() defer m.mx.Unlock() return m.delRequests, nil } func (m *mockCompactorClient) GetCacheGenerationNumber(_ context.Context, _ string) (string, error) { + if m.err != nil { + return "", m.err + } + return m.cacheGenNum, nil } @@ -94,3 +111,7 @@ func (m *mockCompactorClient) Name() string { } func (m *mockCompactorClient) Stop() {} + +func (m *mockCompactorClient) SetErr(err error) { + m.err = err +} From 68775d10ec76f5f9c6aff10249b80957976bf022 Mon Sep 17 00:00:00 2001 From: Robert Jacob Date: Thu, 14 Mar 2024 15:30:30 +0100 Subject: [PATCH 09/25] fix(operator): Keep credentialMode in status when updating schemas (#12212) --- operator/CHANGELOG.md | 1 + operator/internal/status/storage.go | 5 +---- operator/internal/status/storage_test.go | 22 +++++++++++++--------- 3 files changed, 15 insertions(+), 13 deletions(-) diff --git a/operator/CHANGELOG.md b/operator/CHANGELOG.md index fc3e8e224855d..cb06ac3997bd4 100644 --- a/operator/CHANGELOG.md +++ b/operator/CHANGELOG.md @@ -1,5 +1,6 @@ ## Main +- [12212](https://github.com/grafana/loki/pull/12212) **xperimental**: Keep credentialMode in status when updating schemas - [12165](https://github.com/grafana/loki/pull/12165) **JoaoBraveCoding**: Change attribute value used for CCO-based credential mode - [12157](https://github.com/grafana/loki/pull/12157) **periklis**: Fix managed auth features annotation for community-openshift bundle - [12104](https://github.com/grafana/loki/pull/12104) **periklis**: Upgrade build and runtime dependencies diff --git a/operator/internal/status/storage.go b/operator/internal/status/storage.go index ca6dea060a14e..620b5d4eeb364 100644 --- a/operator/internal/status/storage.go +++ b/operator/internal/status/storage.go @@ -21,9 +21,6 @@ func SetStorageSchemaStatus(ctx context.Context, k k8s.Client, req ctrl.Request, return kverrors.Wrap(err, "failed to lookup lokistack", "name", req.NamespacedName) } - s.Status.Storage = lokiv1.LokiStackStorageStatus{ - Schemas: schemas, - } - + s.Status.Storage.Schemas = schemas return k.Status().Update(ctx, &s) } diff --git a/operator/internal/status/storage_test.go b/operator/internal/status/storage_test.go index 5e2c0b595d517..86ea6641b4739 100644 --- a/operator/internal/status/storage_test.go +++ b/operator/internal/status/storage_test.go @@ -66,6 +66,7 @@ func TestSetStorageSchemaStatus_WhenStorageStatusExists_OverwriteStorageStatus(t }, Status: lokiv1.LokiStackStatus{ Storage: lokiv1.LokiStackStorageStatus{ + CredentialMode: lokiv1.CredentialModeStatic, Schemas: []lokiv1.ObjectStorageSchema{ { Version: lokiv1.ObjectStorageSchemaV11, @@ -94,14 +95,17 @@ func TestSetStorageSchemaStatus_WhenStorageStatusExists_OverwriteStorageStatus(t }, } - expected := []lokiv1.ObjectStorageSchema{ - { - Version: lokiv1.ObjectStorageSchemaV11, - EffectiveDate: "2020-10-11", - }, - { - Version: lokiv1.ObjectStorageSchemaV12, - EffectiveDate: "2021-10-11", + expected := lokiv1.LokiStackStorageStatus{ + CredentialMode: lokiv1.CredentialModeStatic, + Schemas: []lokiv1.ObjectStorageSchema{ + { + Version: lokiv1.ObjectStorageSchemaV11, + EffectiveDate: "2020-10-11", + }, + { + Version: lokiv1.ObjectStorageSchemaV12, + EffectiveDate: "2021-10-11", + }, }, } @@ -115,7 +119,7 @@ func TestSetStorageSchemaStatus_WhenStorageStatusExists_OverwriteStorageStatus(t sw.UpdateStub = func(_ context.Context, obj client.Object, _ ...client.SubResourceUpdateOption) error { stack := obj.(*lokiv1.LokiStack) - require.Equal(t, expected, stack.Status.Storage.Schemas) + require.Equal(t, expected, stack.Status.Storage) return nil } From 88393ed6ac83af13f70571c661bddc26f66e526e Mon Sep 17 00:00:00 2001 From: Robert Jacob Date: Thu, 14 Mar 2024 15:50:36 +0100 Subject: [PATCH 10/25] fix(operator): Fix duplicate operator metrics due to ServiceMonitor selector (#12216) Co-authored-by: Periklis Tsirakidis --- operator/CHANGELOG.md | 1 + ...-operator-controller-manager-metrics-service_v1_service.yaml | 1 + ...metrics-monitor_monitoring.coreos.com_v1_servicemonitor.yaml | 1 + .../manifests/loki-operator.clusterserviceversion.yaml | 2 +- ...-operator-controller-manager-metrics-service_v1_service.yaml | 1 + .../manifests/loki-operator.clusterserviceversion.yaml | 2 +- ...-operator-controller-manager-metrics-service_v1_service.yaml | 1 + ...metrics-monitor_monitoring.coreos.com_v1_servicemonitor.yaml | 1 + .../manifests/loki-operator.clusterserviceversion.yaml | 2 +- operator/config/prometheus/monitor.yaml | 1 + operator/config/rbac/auth_proxy_service.yaml | 1 + 11 files changed, 11 insertions(+), 3 deletions(-) diff --git a/operator/CHANGELOG.md b/operator/CHANGELOG.md index cb06ac3997bd4..08c2a5a1c8f8a 100644 --- a/operator/CHANGELOG.md +++ b/operator/CHANGELOG.md @@ -1,5 +1,6 @@ ## Main +- [12216](https://github.com/grafana/loki/pull/12216) **xperimental**: Fix duplicate operator metrics due to ServiceMonitor selector - [12212](https://github.com/grafana/loki/pull/12212) **xperimental**: Keep credentialMode in status when updating schemas - [12165](https://github.com/grafana/loki/pull/12165) **JoaoBraveCoding**: Change attribute value used for CCO-based credential mode - [12157](https://github.com/grafana/loki/pull/12157) **periklis**: Fix managed auth features annotation for community-openshift bundle diff --git a/operator/bundle/community-openshift/manifests/loki-operator-controller-manager-metrics-service_v1_service.yaml b/operator/bundle/community-openshift/manifests/loki-operator-controller-manager-metrics-service_v1_service.yaml index 6f5c94cffea84..6d7591ee5b109 100644 --- a/operator/bundle/community-openshift/manifests/loki-operator-controller-manager-metrics-service_v1_service.yaml +++ b/operator/bundle/community-openshift/manifests/loki-operator-controller-manager-metrics-service_v1_service.yaml @@ -5,6 +5,7 @@ metadata: service.beta.openshift.io/serving-cert-secret-name: loki-operator-metrics creationTimestamp: null labels: + app.kubernetes.io/component: metrics app.kubernetes.io/instance: loki-operator-v0.5.0 app.kubernetes.io/managed-by: operator-lifecycle-manager app.kubernetes.io/name: loki-operator diff --git a/operator/bundle/community-openshift/manifests/loki-operator-metrics-monitor_monitoring.coreos.com_v1_servicemonitor.yaml b/operator/bundle/community-openshift/manifests/loki-operator-metrics-monitor_monitoring.coreos.com_v1_servicemonitor.yaml index 7f5c401e1c073..3f698d26b4761 100644 --- a/operator/bundle/community-openshift/manifests/loki-operator-metrics-monitor_monitoring.coreos.com_v1_servicemonitor.yaml +++ b/operator/bundle/community-openshift/manifests/loki-operator-metrics-monitor_monitoring.coreos.com_v1_servicemonitor.yaml @@ -22,4 +22,5 @@ spec: serverName: loki-operator-controller-manager-metrics-service.kubernetes-operators.svc selector: matchLabels: + app.kubernetes.io/component: metrics app.kubernetes.io/name: loki-operator diff --git a/operator/bundle/community-openshift/manifests/loki-operator.clusterserviceversion.yaml b/operator/bundle/community-openshift/manifests/loki-operator.clusterserviceversion.yaml index 01cf5ae69027c..282c59b85c8de 100644 --- a/operator/bundle/community-openshift/manifests/loki-operator.clusterserviceversion.yaml +++ b/operator/bundle/community-openshift/manifests/loki-operator.clusterserviceversion.yaml @@ -150,7 +150,7 @@ metadata: categories: OpenShift Optional, Logging & Tracing certified: "false" containerImage: docker.io/grafana/loki-operator:0.5.0 - createdAt: "2024-03-12T09:52:37Z" + createdAt: "2024-03-14T13:25:52Z" description: The Community Loki Operator provides Kubernetes native deployment and management of Loki and related logging components. features.operators.openshift.io/disconnected: "true" diff --git a/operator/bundle/community/manifests/loki-operator-controller-manager-metrics-service_v1_service.yaml b/operator/bundle/community/manifests/loki-operator-controller-manager-metrics-service_v1_service.yaml index eb601bf525c4d..a04b76e41efef 100644 --- a/operator/bundle/community/manifests/loki-operator-controller-manager-metrics-service_v1_service.yaml +++ b/operator/bundle/community/manifests/loki-operator-controller-manager-metrics-service_v1_service.yaml @@ -3,6 +3,7 @@ kind: Service metadata: creationTimestamp: null labels: + app.kubernetes.io/component: metrics app.kubernetes.io/instance: loki-operator-v0.5.0 app.kubernetes.io/managed-by: operator-lifecycle-manager app.kubernetes.io/name: loki-operator diff --git a/operator/bundle/community/manifests/loki-operator.clusterserviceversion.yaml b/operator/bundle/community/manifests/loki-operator.clusterserviceversion.yaml index 75d0690ffdfcd..70e8484817b19 100644 --- a/operator/bundle/community/manifests/loki-operator.clusterserviceversion.yaml +++ b/operator/bundle/community/manifests/loki-operator.clusterserviceversion.yaml @@ -150,7 +150,7 @@ metadata: categories: OpenShift Optional, Logging & Tracing certified: "false" containerImage: docker.io/grafana/loki-operator:0.5.0 - createdAt: "2024-03-12T09:52:36Z" + createdAt: "2024-03-14T13:25:49Z" description: The Community Loki Operator provides Kubernetes native deployment and management of Loki and related logging components. operators.operatorframework.io/builder: operator-sdk-unknown diff --git a/operator/bundle/openshift/manifests/loki-operator-controller-manager-metrics-service_v1_service.yaml b/operator/bundle/openshift/manifests/loki-operator-controller-manager-metrics-service_v1_service.yaml index aba4997c36ae9..b7a8a16b7c344 100644 --- a/operator/bundle/openshift/manifests/loki-operator-controller-manager-metrics-service_v1_service.yaml +++ b/operator/bundle/openshift/manifests/loki-operator-controller-manager-metrics-service_v1_service.yaml @@ -5,6 +5,7 @@ metadata: service.beta.openshift.io/serving-cert-secret-name: loki-operator-metrics creationTimestamp: null labels: + app.kubernetes.io/component: metrics app.kubernetes.io/instance: loki-operator-0.1.0 app.kubernetes.io/managed-by: operator-lifecycle-manager app.kubernetes.io/name: loki-operator diff --git a/operator/bundle/openshift/manifests/loki-operator-metrics-monitor_monitoring.coreos.com_v1_servicemonitor.yaml b/operator/bundle/openshift/manifests/loki-operator-metrics-monitor_monitoring.coreos.com_v1_servicemonitor.yaml index 83b6a0efa53ab..7c62cf0585190 100644 --- a/operator/bundle/openshift/manifests/loki-operator-metrics-monitor_monitoring.coreos.com_v1_servicemonitor.yaml +++ b/operator/bundle/openshift/manifests/loki-operator-metrics-monitor_monitoring.coreos.com_v1_servicemonitor.yaml @@ -22,4 +22,5 @@ spec: serverName: loki-operator-controller-manager-metrics-service.openshift-operators-redhat.svc selector: matchLabels: + app.kubernetes.io/component: metrics app.kubernetes.io/name: loki-operator diff --git a/operator/bundle/openshift/manifests/loki-operator.clusterserviceversion.yaml b/operator/bundle/openshift/manifests/loki-operator.clusterserviceversion.yaml index aafba3b3a6574..ee2dcb513fe3b 100644 --- a/operator/bundle/openshift/manifests/loki-operator.clusterserviceversion.yaml +++ b/operator/bundle/openshift/manifests/loki-operator.clusterserviceversion.yaml @@ -150,7 +150,7 @@ metadata: categories: OpenShift Optional, Logging & Tracing certified: "false" containerImage: quay.io/openshift-logging/loki-operator:0.1.0 - createdAt: "2024-03-12T09:52:39Z" + createdAt: "2024-03-14T13:25:55Z" description: | The Loki Operator for OCP provides a means for configuring and managing a Loki stack for cluster logging. ## Prerequisites and Requirements diff --git a/operator/config/prometheus/monitor.yaml b/operator/config/prometheus/monitor.yaml index 9a4fabe3c5d33..545744c6d952a 100644 --- a/operator/config/prometheus/monitor.yaml +++ b/operator/config/prometheus/monitor.yaml @@ -10,3 +10,4 @@ spec: selector: matchLabels: app.kubernetes.io/name: loki-operator + app.kubernetes.io/component: metrics diff --git a/operator/config/rbac/auth_proxy_service.yaml b/operator/config/rbac/auth_proxy_service.yaml index 7a0b8d71dfa71..3470650c0e4c7 100644 --- a/operator/config/rbac/auth_proxy_service.yaml +++ b/operator/config/rbac/auth_proxy_service.yaml @@ -2,6 +2,7 @@ apiVersion: v1 kind: Service metadata: labels: + app.kubernetes.io/component: metrics name: controller-manager-metrics-service spec: ports: From 1331dc5bdfb7f0d6830031721d8286418bc5ac47 Mon Sep 17 00:00:00 2001 From: Travis Patterson Date: Thu, 14 Mar 2024 08:59:31 -0600 Subject: [PATCH 11/25] fix: add metadata resource and source attrs to stats (#12201) --- pkg/loghttp/push/otlp.go | 21 +++++++++++++-------- pkg/loghttp/push/otlp_test.go | 31 +++++++++++++++++++++++++++++++ pkg/loghttp/push/push.go | 22 ++++++++++++---------- 3 files changed, 56 insertions(+), 18 deletions(-) diff --git a/pkg/loghttp/push/otlp.go b/pkg/loghttp/push/otlp.go index 58a594b01221c..47a5959b1a333 100644 --- a/pkg/loghttp/push/otlp.go +++ b/pkg/loghttp/push/otlp.go @@ -30,8 +30,9 @@ const ( func newPushStats() *Stats { return &Stats{ - LogLinesBytes: map[time.Duration]int64{}, - StructuredMetadataBytes: map[time.Duration]int64{}, + LogLinesBytes: map[time.Duration]int64{}, + StructuredMetadataBytes: map[time.Duration]int64{}, + ResourceAndSourceMetadataLabels: map[time.Duration]push.LabelsAdapter{}, } } @@ -146,7 +147,10 @@ func otlpToLokiPushRequest(ld plog.Logs, userID string, tenantsRetention Tenants } resourceAttributesAsStructuredMetadataSize := labelsSize(resourceAttributesAsStructuredMetadata) - stats.StructuredMetadataBytes[tenantsRetention.RetentionPeriodFor(userID, lbs)] += int64(resourceAttributesAsStructuredMetadataSize) + retentionPeriodForUser := tenantsRetention.RetentionPeriodFor(userID, lbs) + + stats.StructuredMetadataBytes[retentionPeriodForUser] += int64(resourceAttributesAsStructuredMetadataSize) + stats.ResourceAndSourceMetadataLabels[retentionPeriodForUser] = append(stats.ResourceAndSourceMetadataLabels[retentionPeriodForUser], resourceAttributesAsStructuredMetadata...) for j := 0; j < sls.Len(); j++ { scope := sls.At(j).Scope() @@ -196,7 +200,8 @@ func otlpToLokiPushRequest(ld plog.Logs, userID string, tenantsRetention Tenants } scopeAttributesAsStructuredMetadataSize := labelsSize(scopeAttributesAsStructuredMetadata) - stats.StructuredMetadataBytes[tenantsRetention.RetentionPeriodFor(userID, lbs)] += int64(scopeAttributesAsStructuredMetadataSize) + stats.StructuredMetadataBytes[retentionPeriodForUser] += int64(scopeAttributesAsStructuredMetadataSize) + stats.ResourceAndSourceMetadataLabels[retentionPeriodForUser] = append(stats.ResourceAndSourceMetadataLabels[retentionPeriodForUser], scopeAttributesAsStructuredMetadata...) for k := 0; k < logs.Len(); k++ { log := logs.At(k) @@ -217,12 +222,12 @@ func otlpToLokiPushRequest(ld plog.Logs, userID string, tenantsRetention Tenants pushRequestsByStream[labelsStr] = stream metadataSize := int64(labelsSize(entry.StructuredMetadata) - resourceAttributesAsStructuredMetadataSize - scopeAttributesAsStructuredMetadataSize) - stats.StructuredMetadataBytes[tenantsRetention.RetentionPeriodFor(userID, lbs)] += metadataSize - stats.LogLinesBytes[tenantsRetention.RetentionPeriodFor(userID, lbs)] += int64(len(entry.Line)) + stats.StructuredMetadataBytes[retentionPeriodForUser] += metadataSize + stats.LogLinesBytes[retentionPeriodForUser] += int64(len(entry.Line)) if tracker != nil { - tracker.ReceivedBytesAdd(userID, tenantsRetention.RetentionPeriodFor(userID, lbs), lbs, float64(len(entry.Line))) - tracker.ReceivedBytesAdd(userID, tenantsRetention.RetentionPeriodFor(userID, lbs), lbs, float64(metadataSize)) + tracker.ReceivedBytesAdd(userID, retentionPeriodForUser, lbs, float64(len(entry.Line))) + tracker.ReceivedBytesAdd(userID, retentionPeriodForUser, lbs, float64(metadataSize)) } stats.NumLines++ diff --git a/pkg/loghttp/push/otlp_test.go b/pkg/loghttp/push/otlp_test.go index 3f8e3e8a92ca7..6cd0bacada00f 100644 --- a/pkg/loghttp/push/otlp_test.go +++ b/pkg/loghttp/push/otlp_test.go @@ -79,6 +79,9 @@ func TestOTLPToLokiPushRequest(t *testing.T) { StructuredMetadataBytes: map[time.Duration]int64{ time.Hour: 0, }, + ResourceAndSourceMetadataLabels: map[time.Duration]push.LabelsAdapter{ + time.Hour: nil, + }, StreamLabelsSize: 21, MostRecentEntryTimestamp: now, }, @@ -115,6 +118,9 @@ func TestOTLPToLokiPushRequest(t *testing.T) { StructuredMetadataBytes: map[time.Duration]int64{ time.Hour: 0, }, + ResourceAndSourceMetadataLabels: map[time.Duration]push.LabelsAdapter{ + time.Hour: nil, + }, StreamLabelsSize: 27, MostRecentEntryTimestamp: now, }, @@ -152,6 +158,9 @@ func TestOTLPToLokiPushRequest(t *testing.T) { StructuredMetadataBytes: map[time.Duration]int64{ time.Hour: 0, }, + ResourceAndSourceMetadataLabels: map[time.Duration]push.LabelsAdapter{ + time.Hour: nil, + }, StreamLabelsSize: 47, MostRecentEntryTimestamp: now, /* @@ -252,6 +261,13 @@ func TestOTLPToLokiPushRequest(t *testing.T) { StructuredMetadataBytes: map[time.Duration]int64{ time.Hour: 37, }, + ResourceAndSourceMetadataLabels: map[time.Duration]push.LabelsAdapter{ + time.Hour: []push.LabelAdapter{ + {Name: "service_image", Value: "loki"}, + {Name: "op", Value: "buzz"}, + {Name: "scope_name", Value: "fizz"}, + }, + }, StreamLabelsSize: 21, MostRecentEntryTimestamp: now, }, @@ -336,6 +352,13 @@ func TestOTLPToLokiPushRequest(t *testing.T) { StructuredMetadataBytes: map[time.Duration]int64{ time.Hour: 97, }, + ResourceAndSourceMetadataLabels: map[time.Duration]push.LabelsAdapter{ + time.Hour: []push.LabelAdapter{ + {Name: "resource_nested_foo", Value: "bar"}, + {Name: "scope_nested_foo", Value: "bar"}, + {Name: "scope_name", Value: "fizz"}, + }, + }, StreamLabelsSize: 21, MostRecentEntryTimestamp: now, }, @@ -479,6 +502,14 @@ func TestOTLPToLokiPushRequest(t *testing.T) { StructuredMetadataBytes: map[time.Duration]int64{ time.Hour: 113, }, + ResourceAndSourceMetadataLabels: map[time.Duration]push.LabelsAdapter{ + time.Hour: []push.LabelAdapter{ + {Name: "pod_ip", Value: "10.200.200.200"}, + {Name: "resource_nested_foo", Value: "bar"}, + {Name: "scope_nested_foo", Value: "bar"}, + {Name: "scope_name", Value: "fizz"}, + }, + }, StreamLabelsSize: 42, MostRecentEntryTimestamp: now, }, diff --git a/pkg/loghttp/push/push.go b/pkg/loghttp/push/push.go index f2acc8717539a..cb49b287d9411 100644 --- a/pkg/loghttp/push/push.go +++ b/pkg/loghttp/push/push.go @@ -5,6 +5,7 @@ import ( "compress/gzip" "fmt" "github.com/go-kit/log/level" + "github.com/grafana/loki/pkg/push" "io" "math" "mime" @@ -73,16 +74,17 @@ type RequestParser func(userID string, r *http.Request, tenantsRetention Tenants type RequestParserWrapper func(inner RequestParser) RequestParser type Stats struct { - Errs []error - NumLines int64 - LogLinesBytes map[time.Duration]int64 - StructuredMetadataBytes map[time.Duration]int64 - StreamLabelsSize int64 - MostRecentEntryTimestamp time.Time - ContentType string - ContentEncoding string - BodySize int64 - + Errs []error + NumLines int64 + LogLinesBytes map[time.Duration]int64 + StructuredMetadataBytes map[time.Duration]int64 + ResourceAndSourceMetadataLabels map[time.Duration]push.LabelsAdapter + StreamLabelsSize int64 + MostRecentEntryTimestamp time.Time + ContentType string + ContentEncoding string + + BodySize int64 // Extra is a place for a wrapped perser to record any interesting stats as key-value pairs to be logged Extra []any } From 4b28f8221b8ee1094347aed442d058d68bd9673f Mon Sep 17 00:00:00 2001 From: Salva Corts Date: Thu, 14 Mar 2024 16:17:35 +0100 Subject: [PATCH 12/25] fix: Read all series in NewTSDBSeriesIter and close index file (#12211) --- pkg/bloomcompactor/bloomcompactor.go | 2 +- pkg/bloomcompactor/controller.go | 2 +- pkg/bloomcompactor/tsdb.go | 129 +++++++++------------------ pkg/bloomcompactor/tsdb_test.go | 34 +++++-- pkg/storage/bloom/v1/util.go | 7 ++ 5 files changed, 78 insertions(+), 96 deletions(-) diff --git a/pkg/bloomcompactor/bloomcompactor.go b/pkg/bloomcompactor/bloomcompactor.go index 6707213ce64bb..ddfe552cb2ad8 100644 --- a/pkg/bloomcompactor/bloomcompactor.go +++ b/pkg/bloomcompactor/bloomcompactor.go @@ -80,7 +80,7 @@ func New( bloomStore: store, } - tsdbStore, err := NewTSDBStores(schemaCfg, storeCfg, clientMetrics) + tsdbStore, err := NewTSDBStores(schemaCfg, storeCfg, clientMetrics, logger) if err != nil { return nil, errors.Wrap(err, "failed to create TSDB store") } diff --git a/pkg/bloomcompactor/controller.go b/pkg/bloomcompactor/controller.go index 55fd7548881bf..2e0a27e68904a 100644 --- a/pkg/bloomcompactor/controller.go +++ b/pkg/bloomcompactor/controller.go @@ -285,7 +285,7 @@ func (s *SimpleBloomController) loadWorkForGap( tenant string, id tsdb.Identifier, gap gapWithBlocks, -) (v1.CloseableIterator[*v1.Series], v1.CloseableResettableIterator[*v1.SeriesWithBloom], error) { +) (v1.Iterator[*v1.Series], v1.CloseableResettableIterator[*v1.SeriesWithBloom], error) { // load a series iterator for the gap seriesItr, err := s.tsdbStore.LoadTSDB(ctx, table, tenant, id, gap.bounds) if err != nil { diff --git a/pkg/bloomcompactor/tsdb.go b/pkg/bloomcompactor/tsdb.go index aa192c251f5f7..ddfd78c2974b4 100644 --- a/pkg/bloomcompactor/tsdb.go +++ b/pkg/bloomcompactor/tsdb.go @@ -7,8 +7,9 @@ import ( "math" "path" "strings" - "sync" + "github.com/go-kit/log" + "github.com/go-kit/log/level" "github.com/pkg/errors" "github.com/prometheus/common/model" "github.com/prometheus/prometheus/model/labels" @@ -35,18 +36,20 @@ type TSDBStore interface { tenant string, id tsdb.Identifier, bounds v1.FingerprintBounds, - ) (v1.CloseableIterator[*v1.Series], error) + ) (v1.Iterator[*v1.Series], error) } // BloomTSDBStore is a wrapper around the storage.Client interface which // implements the TSDBStore interface for this pkg. type BloomTSDBStore struct { storage storage.Client + logger log.Logger } -func NewBloomTSDBStore(storage storage.Client) *BloomTSDBStore { +func NewBloomTSDBStore(storage storage.Client, logger log.Logger) *BloomTSDBStore { return &BloomTSDBStore{ storage: storage, + logger: logger, } } @@ -85,7 +88,7 @@ func (b *BloomTSDBStore) LoadTSDB( tenant string, id tsdb.Identifier, bounds v1.FingerprintBounds, -) (v1.CloseableIterator[*v1.Series], error) { +) (v1.Iterator[*v1.Series], error) { withCompression := id.Name() + gzipExtension data, err := b.storage.GetUserFile(ctx, table.Addr(), tenant, withCompression) @@ -112,8 +115,13 @@ func (b *BloomTSDBStore) LoadTSDB( } idx := tsdb.NewTSDBIndex(reader) + defer func() { + if err := idx.Close(); err != nil { + level.Error(b.logger).Log("msg", "failed to close index", "err", err) + } + }() - return NewTSDBSeriesIter(ctx, idx, bounds), nil + return NewTSDBSeriesIter(ctx, idx, bounds) } // TSDBStore is an interface for interacting with the TSDB, @@ -127,74 +135,21 @@ type forSeries interface { fn func(labels.Labels, model.Fingerprint, []index.ChunkMeta), matchers ...*labels.Matcher, ) error - Close() error -} - -type TSDBSeriesIter struct { - mtx sync.Mutex - f forSeries - bounds v1.FingerprintBounds - ctx context.Context - - ch chan *v1.Series - initialized bool - next *v1.Series - err error -} - -func NewTSDBSeriesIter(ctx context.Context, f forSeries, bounds v1.FingerprintBounds) *TSDBSeriesIter { - return &TSDBSeriesIter{ - f: f, - bounds: bounds, - ctx: ctx, - ch: make(chan *v1.Series), - } -} - -func (t *TSDBSeriesIter) Next() bool { - if !t.initialized { - t.initialized = true - t.background() - } - - select { - case <-t.ctx.Done(): - return false - case next, ok := <-t.ch: - t.next = next - return ok - } -} - -func (t *TSDBSeriesIter) At() *v1.Series { - return t.next } -func (t *TSDBSeriesIter) Err() error { - t.mtx.Lock() - defer t.mtx.Unlock() - - if t.err != nil { - return t.err - } - - return t.ctx.Err() -} - -func (t *TSDBSeriesIter) Close() error { - return t.f.Close() -} - -// background iterates over the tsdb file, populating the next -// value via a channel to handle backpressure -func (t *TSDBSeriesIter) background() { - go func() { - err := t.f.ForSeries( - t.ctx, - t.bounds, - 0, math.MaxInt64, - func(_ labels.Labels, fp model.Fingerprint, chks []index.ChunkMeta) { - +func NewTSDBSeriesIter(ctx context.Context, f forSeries, bounds v1.FingerprintBounds) (v1.Iterator[*v1.Series], error) { + // TODO(salvacorts): Create a pool + series := make([]*v1.Series, 0, 100) + + if err := f.ForSeries( + ctx, + bounds, + 0, math.MaxInt64, + func(_ labels.Labels, fp model.Fingerprint, chks []index.ChunkMeta) { + select { + case <-ctx.Done(): + return + default: res := &v1.Series{ Fingerprint: fp, Chunks: make(v1.ChunkRefs, 0, len(chks)), @@ -207,19 +162,20 @@ func (t *TSDBSeriesIter) background() { }) } - select { - case <-t.ctx.Done(): - return - case t.ch <- res: - } - }, - labels.MustNewMatcher(labels.MatchEqual, "", ""), - ) - t.mtx.Lock() - t.err = err - t.mtx.Unlock() - close(t.ch) - }() + series = append(series, res) + } + }, + labels.MustNewMatcher(labels.MatchEqual, "", ""), + ); err != nil { + return nil, err + } + + select { + case <-ctx.Done(): + return v1.NewEmptyIter[*v1.Series](), ctx.Err() + default: + return v1.NewCancelableIter[*v1.Series](ctx, v1.NewSliceIter[*v1.Series](series)), nil + } } type TSDBStores struct { @@ -231,6 +187,7 @@ func NewTSDBStores( schemaCfg config.SchemaConfig, storeCfg baseStore.Config, clientMetrics baseStore.ClientMetrics, + logger log.Logger, ) (*TSDBStores, error) { res := &TSDBStores{ schemaCfg: schemaCfg, @@ -244,7 +201,7 @@ func NewTSDBStores( if err != nil { return nil, errors.Wrap(err, "failed to create object client") } - res.stores[i] = NewBloomTSDBStore(storage.NewIndexStorageClient(c, cfg.IndexTables.PathPrefix)) + res.stores[i] = NewBloomTSDBStore(storage.NewIndexStorageClient(c, cfg.IndexTables.PathPrefix), logger) } } @@ -303,7 +260,7 @@ func (s *TSDBStores) LoadTSDB( tenant string, id tsdb.Identifier, bounds v1.FingerprintBounds, -) (v1.CloseableIterator[*v1.Series], error) { +) (v1.Iterator[*v1.Series], error) { store, err := s.storeForPeriod(table.DayTime) if err != nil { return nil, err diff --git a/pkg/bloomcompactor/tsdb_test.go b/pkg/bloomcompactor/tsdb_test.go index b0ffd4ce75be0..91ad1719375ac 100644 --- a/pkg/bloomcompactor/tsdb_test.go +++ b/pkg/bloomcompactor/tsdb_test.go @@ -61,7 +61,8 @@ func TestTSDBSeriesIter(t *testing.T) { }, } srcItr := v1.NewSliceIter(input) - itr := NewTSDBSeriesIter(context.Background(), forSeriesTestImpl(input), v1.NewBounds(0, math.MaxUint64)) + itr, err := NewTSDBSeriesIter(context.Background(), forSeriesTestImpl(input), v1.NewBounds(0, math.MaxUint64)) + require.NoError(t, err) v1.EqualIterators[*v1.Series]( t, @@ -74,13 +75,30 @@ func TestTSDBSeriesIter(t *testing.T) { } func TestTSDBSeriesIter_Expiry(t *testing.T) { - ctx, cancel := context.WithCancel(context.Background()) - cancel() - itr := NewTSDBSeriesIter(ctx, forSeriesTestImpl{ - {}, // a single entry - }, v1.NewBounds(0, math.MaxUint64)) + t.Run("expires on creation", func(t *testing.T) { + ctx, cancel := context.WithCancel(context.Background()) + cancel() + itr, err := NewTSDBSeriesIter(ctx, forSeriesTestImpl{ + {}, // a single entry + }, v1.NewBounds(0, math.MaxUint64)) + require.Error(t, err) + require.False(t, itr.Next()) + }) - require.False(t, itr.Next()) - require.Error(t, itr.Err()) + t.Run("expires during consumption", func(t *testing.T) { + ctx, cancel := context.WithCancel(context.Background()) + itr, err := NewTSDBSeriesIter(ctx, forSeriesTestImpl{ + {}, + {}, + }, v1.NewBounds(0, math.MaxUint64)) + require.NoError(t, err) + + require.True(t, itr.Next()) + require.NoError(t, itr.Err()) + + cancel() + require.False(t, itr.Next()) + require.Error(t, itr.Err()) + }) } diff --git a/pkg/storage/bloom/v1/util.go b/pkg/storage/bloom/v1/util.go index c0b9ffab13071..e96af779cc186 100644 --- a/pkg/storage/bloom/v1/util.go +++ b/pkg/storage/bloom/v1/util.go @@ -222,6 +222,13 @@ func (cii *CancellableIter[T]) Next() bool { } } +func (cii *CancellableIter[T]) Err() error { + if err := cii.ctx.Err(); err != nil { + return err + } + return cii.Iterator.Err() +} + func NewCancelableIter[T any](ctx context.Context, itr Iterator[T]) *CancellableIter[T] { return &CancellableIter[T]{ctx: ctx, Iterator: itr} } From 8abfc29e44807c35402a10b2425f3e7250525838 Mon Sep 17 00:00:00 2001 From: Christian Haudum Date: Thu, 14 Mar 2024 17:28:13 +0100 Subject: [PATCH 13/25] chore(bloom-gw): Process blocks in parallel (#12203) While processing a single block is rather fast, processing a lot of blocks sequentially can lead to problem that single slowly processed blocks lead to high tail latency. Signed-off-by: Christian Haudum --- pkg/bloomgateway/bloomgateway.go | 12 +++++----- pkg/bloomgateway/multiplexing.go | 40 +++++++++++++++----------------- pkg/bloomgateway/processor.go | 24 +++++++++++-------- 3 files changed, 39 insertions(+), 37 deletions(-) diff --git a/pkg/bloomgateway/bloomgateway.go b/pkg/bloomgateway/bloomgateway.go index b78e80beefa03..515b27b01b2cf 100644 --- a/pkg/bloomgateway/bloomgateway.go +++ b/pkg/bloomgateway/bloomgateway.go @@ -44,6 +44,7 @@ package bloomgateway import ( "context" "fmt" + "sort" "time" "github.com/go-kit/log" @@ -347,14 +348,11 @@ func (g *Gateway) FilterChunkRefs(ctx context.Context, req *logproto.FilterChunk // Once the tasks is closed, it will send the task with the results from the // block querier to the supplied task channel. func (g *Gateway) consumeTask(ctx context.Context, task Task, tasksCh chan<- Task) { - logger := log.With(g.logger, "task", task.ID) - for res := range task.resCh { select { case <-ctx.Done(): - level.Debug(logger).Log("msg", "drop partial result", "fp_int", uint64(res.Fp), "fp_hex", res.Fp, "chunks_to_remove", res.Removals.Len()) + // do nothing default: - level.Debug(logger).Log("msg", "accept partial result", "fp_int", uint64(res.Fp), "fp_hex", res.Fp, "chunks_to_remove", res.Removals.Len()) task.responses = append(task.responses, res) } } @@ -368,18 +366,20 @@ func (g *Gateway) consumeTask(ctx context.Context, task Task, tasksCh chan<- Tas } } -// merges a list of responses via a heap. The same fingerprints and chunks can be present in multiple responses, -// but each response must be ordered by fingerprint +// merges a list of responses via a heap. The same fingerprints and chunks can be present in multiple responses. +// Individual responses do not need to be be ordered beforehand. func orderedResponsesByFP(responses [][]v1.Output) v1.Iterator[v1.Output] { if len(responses) == 0 { return v1.NewEmptyIter[v1.Output]() } if len(responses) == 1 { + sort.Slice(responses[0], func(i, j int) bool { return responses[0][i].Fp < responses[0][j].Fp }) return v1.NewSliceIter(responses[0]) } itrs := make([]v1.PeekingIterator[v1.Output], 0, len(responses)) for _, r := range responses { + sort.Slice(r, func(i, j int) bool { return r[i].Fp < r[j].Fp }) itrs = append(itrs, v1.NewPeekingIter(v1.NewSliceIter(r))) } return v1.NewHeapIterator[v1.Output]( diff --git a/pkg/bloomgateway/multiplexing.go b/pkg/bloomgateway/multiplexing.go index 8279dda99a595..97e0b0aa6d66f 100644 --- a/pkg/bloomgateway/multiplexing.go +++ b/pkg/bloomgateway/multiplexing.go @@ -87,17 +87,16 @@ func NewTask(ctx context.Context, tenantID string, refs seriesWithInterval, filt } task := Task{ - ID: key, - Tenant: tenantID, - err: new(wrappedError), - resCh: make(chan v1.Output), - filters: filters, - series: refs.series, - interval: refs.interval, - table: refs.day, - ctx: ctx, - done: make(chan struct{}), - responses: make([]v1.Output, 0, len(refs.series)), + ID: key, + Tenant: tenantID, + err: new(wrappedError), + resCh: make(chan v1.Output), + filters: filters, + series: refs.series, + interval: refs.interval, + table: refs.day, + ctx: ctx, + done: make(chan struct{}), } return task, nil } @@ -130,16 +129,15 @@ func (t Task) CloseWithError(err error) { func (t Task) Copy(series []*logproto.GroupedChunkRefs) Task { // do not copy ID to distinguish it as copied task return Task{ - Tenant: t.Tenant, - err: t.err, - resCh: t.resCh, - filters: t.filters, - series: series, - interval: t.interval, - table: t.table, - ctx: t.ctx, - done: make(chan struct{}), - responses: make([]v1.Output, 0, len(series)), + Tenant: t.Tenant, + err: t.err, + resCh: t.resCh, + filters: t.filters, + series: series, + interval: t.interval, + table: t.table, + ctx: t.ctx, + done: make(chan struct{}), } } diff --git a/pkg/bloomgateway/processor.go b/pkg/bloomgateway/processor.go index 90e3f6f730c93..9a503551d3d23 100644 --- a/pkg/bloomgateway/processor.go +++ b/pkg/bloomgateway/processor.go @@ -10,6 +10,7 @@ import ( "github.com/opentracing/opentracing-go" "github.com/pkg/errors" + "github.com/grafana/dskit/concurrency" v1 "github.com/grafana/loki/pkg/storage/bloom/v1" "github.com/grafana/loki/pkg/storage/config" "github.com/grafana/loki/pkg/storage/stores/shipper/bloomshipper" @@ -93,31 +94,34 @@ func (p *processor) processBlocks(ctx context.Context, data []blockWithTasks) er return err } - // TODO(chaudum): use `concurrency` lib with bound parallelism - for i, bq := range bqs { - block := data[i] + // TODO(chaudum): What's a good cocurrency value? + return concurrency.ForEachJob(ctx, len(bqs), 10, func(ctx context.Context, i int) error { + bq := bqs[i] if bq == nil { // TODO(chaudum): Add metric for skipped blocks - continue + return nil } + defer bq.Close() + + block := data[i] level.Debug(p.logger).Log( "msg", "process block with tasks", + "job", i+1, + "of_jobs", len(bqs), "block", block.ref, - "block_bounds", block.ref.Bounds, - "querier_bounds", bq.Bounds, "num_tasks", len(block.tasks), ) + if !block.ref.Bounds.Equal(bq.Bounds) { - bq.Close() return errors.Errorf("block and querier bounds differ: %s vs %s", block.ref.Bounds, bq.Bounds) } + err := p.processBlock(ctx, bq.BlockQuerier, block.tasks) - bq.Close() if err != nil { return errors.Wrap(err, "processing block") } - } - return nil + return nil + }) } func (p *processor) processBlock(_ context.Context, blockQuerier *v1.BlockQuerier, tasks []Task) error { From f0cf65b3c38de739443bfd017345931412621645 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sebastian=20Ho=C3=9F?= Date: Thu, 14 Mar 2024 18:10:07 +0100 Subject: [PATCH 14/25] fix: allow to configure http_config for ruler (#12113) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Sebastian Hoß Co-authored-by: Michel Hollands <42814411+MichelHollands@users.noreply.github.com> --- production/helm/loki/CHANGELOG.md | 6 +++++- production/helm/loki/Chart.yaml | 2 +- production/helm/loki/README.md | 2 +- production/helm/loki/templates/_helpers.tpl | 3 +++ 4 files changed, 10 insertions(+), 3 deletions(-) diff --git a/production/helm/loki/CHANGELOG.md b/production/helm/loki/CHANGELOG.md index 8450e4efab11a..2e372fe1d1ffd 100644 --- a/production/helm/loki/CHANGELOG.md +++ b/production/helm/loki/CHANGELOG.md @@ -13,9 +13,13 @@ Entries should include a reference to the pull request that introduced the chang [//]: # ( : do not remove this line. This locator is used by the CI pipeline to automatically create a changelog entry for each new Loki release. Add other chart versions and respective changelog entries bellow this line.) +## 5.43.7 + +- [BUGFIX] allow to configure http_config for ruler + ## 5.43.6 - [ENHANCEMENT] Add `ciliumnetworkpolicy` with egress to world for table-manager if enabled. +- [ENHANCEMENT] Add `ciliumnetworkpolicy` with egress to world for table-manager if enabled. ## 5.43.5 diff --git a/production/helm/loki/Chart.yaml b/production/helm/loki/Chart.yaml index 904b04e1d0e25..025037f888807 100644 --- a/production/helm/loki/Chart.yaml +++ b/production/helm/loki/Chart.yaml @@ -3,7 +3,7 @@ name: loki description: Helm chart for Grafana Loki in simple, scalable mode type: application appVersion: 2.9.4 -version: 5.43.6 +version: 5.43.7 home: https://grafana.github.io/helm-charts sources: - https://github.com/grafana/loki diff --git a/production/helm/loki/README.md b/production/helm/loki/README.md index 652cf73679fe6..46c31bb2410b8 100644 --- a/production/helm/loki/README.md +++ b/production/helm/loki/README.md @@ -1,6 +1,6 @@ # loki -![Version: 5.43.6](https://img.shields.io/badge/Version-5.43.6-informational?style=flat-square) ![Type: application](https://img.shields.io/badge/Type-application-informational?style=flat-square) ![AppVersion: 2.9.4](https://img.shields.io/badge/AppVersion-2.9.4-informational?style=flat-square) +![Version: 5.43.7](https://img.shields.io/badge/Version-5.43.7-informational?style=flat-square) ![Type: application](https://img.shields.io/badge/Type-application-informational?style=flat-square) ![AppVersion: 2.9.4](https://img.shields.io/badge/AppVersion-2.9.4-informational?style=flat-square) Helm chart for Grafana Loki in simple, scalable mode diff --git a/production/helm/loki/templates/_helpers.tpl b/production/helm/loki/templates/_helpers.tpl index 502c7650010b6..06884953103fc 100644 --- a/production/helm/loki/templates/_helpers.tpl +++ b/production/helm/loki/templates/_helpers.tpl @@ -362,6 +362,9 @@ s3: {{- end }} s3forcepathstyle: {{ .s3ForcePathStyle }} insecure: {{ .insecure }} + {{- with .http_config }} + http_config: {{ toYaml . | nindent 6 }} + {{- end }} {{- end -}} {{- else if eq .Values.loki.storage.type "gcs" -}} {{- with .Values.loki.storage.gcs }} From 862d0fb2cf562e347591d7a39aa26dfbc11855a6 Mon Sep 17 00:00:00 2001 From: Periklis Tsirakidis Date: Thu, 14 Mar 2024 19:57:09 +0100 Subject: [PATCH 15/25] fix(operator): Use safe bearer token authentication to scrape operator metrics (#12164) --- operator/CHANGELOG.md | 1 + ...ager-metrics-reader_v1_serviceaccount.yaml | 11 +++++++++++ ...oller-manager-metrics-token_v1_secret.yaml | 13 +++++++++++++ ...rization.k8s.io_v1_clusterrolebinding.yaml | 19 +++++++++++++++++++ ...nitoring.coreos.com_v1_servicemonitor.yaml | 11 +++++++++-- .../loki-operator.clusterserviceversion.yaml | 5 +++-- ...ager-metrics-reader_v1_serviceaccount.yaml | 11 +++++++++++ ...rization.k8s.io_v1_clusterrolebinding.yaml | 19 +++++++++++++++++++ .../loki-operator.clusterserviceversion.yaml | 5 +++-- ...ager-metrics-reader_v1_serviceaccount.yaml | 11 +++++++++++ ...oller-manager-metrics-token_v1_secret.yaml | 13 +++++++++++++ ...rization.k8s.io_v1_clusterrolebinding.yaml | 19 +++++++++++++++++++ ...nitoring.coreos.com_v1_servicemonitor.yaml | 11 +++++++++-- .../loki-operator.clusterserviceversion.yaml | 5 +++-- operator/config/manager/manager.yaml | 1 + .../prometheus_service_monitor_patch.yaml | 13 ++++++++++--- .../overlays/openshift/kustomization.yaml | 1 + .../manager_metrics_secret_token.yaml | 7 +++++++ .../prometheus_service_monitor_patch.yaml | 13 ++++++++++--- .../auth_proxy_client_clusterrolebinding.yaml | 12 ++++++++++++ .../auth_proxy_client_serviceaccount.yaml | 5 +++++ .../config/rbac/auth_proxy_role_binding.yaml | 2 +- operator/config/rbac/kustomization.yaml | 3 +++ .../rbac/leader_election_role_binding.yaml | 2 +- operator/config/rbac/role_binding.yaml | 2 +- operator/config/rbac/serviceaccount.yaml | 5 +++++ 26 files changed, 201 insertions(+), 19 deletions(-) create mode 100644 operator/bundle/community-openshift/manifests/loki-operator-controller-manager-metrics-reader_v1_serviceaccount.yaml create mode 100644 operator/bundle/community-openshift/manifests/loki-operator-controller-manager-metrics-token_v1_secret.yaml create mode 100644 operator/bundle/community-openshift/manifests/loki-operator-controller-manager-read-metrics_rbac.authorization.k8s.io_v1_clusterrolebinding.yaml create mode 100644 operator/bundle/community/manifests/loki-operator-controller-manager-metrics-reader_v1_serviceaccount.yaml create mode 100644 operator/bundle/community/manifests/loki-operator-controller-manager-read-metrics_rbac.authorization.k8s.io_v1_clusterrolebinding.yaml create mode 100644 operator/bundle/openshift/manifests/loki-operator-controller-manager-metrics-reader_v1_serviceaccount.yaml create mode 100644 operator/bundle/openshift/manifests/loki-operator-controller-manager-metrics-token_v1_secret.yaml create mode 100644 operator/bundle/openshift/manifests/loki-operator-controller-manager-read-metrics_rbac.authorization.k8s.io_v1_clusterrolebinding.yaml create mode 100644 operator/config/overlays/openshift/manager_metrics_secret_token.yaml create mode 100644 operator/config/rbac/auth_proxy_client_clusterrolebinding.yaml create mode 100644 operator/config/rbac/auth_proxy_client_serviceaccount.yaml create mode 100644 operator/config/rbac/serviceaccount.yaml diff --git a/operator/CHANGELOG.md b/operator/CHANGELOG.md index 08c2a5a1c8f8a..b4aca5f3c4446 100644 --- a/operator/CHANGELOG.md +++ b/operator/CHANGELOG.md @@ -1,5 +1,6 @@ ## Main +- [12164](https://github.com/grafana/loki/pull/12164) **periklis**: Use safe bearer token authentication to scrape operator metrics - [12216](https://github.com/grafana/loki/pull/12216) **xperimental**: Fix duplicate operator metrics due to ServiceMonitor selector - [12212](https://github.com/grafana/loki/pull/12212) **xperimental**: Keep credentialMode in status when updating schemas - [12165](https://github.com/grafana/loki/pull/12165) **JoaoBraveCoding**: Change attribute value used for CCO-based credential mode diff --git a/operator/bundle/community-openshift/manifests/loki-operator-controller-manager-metrics-reader_v1_serviceaccount.yaml b/operator/bundle/community-openshift/manifests/loki-operator-controller-manager-metrics-reader_v1_serviceaccount.yaml new file mode 100644 index 0000000000000..9c9303fdc7104 --- /dev/null +++ b/operator/bundle/community-openshift/manifests/loki-operator-controller-manager-metrics-reader_v1_serviceaccount.yaml @@ -0,0 +1,11 @@ +apiVersion: v1 +kind: ServiceAccount +metadata: + creationTimestamp: null + labels: + app.kubernetes.io/instance: loki-operator-v0.5.0 + app.kubernetes.io/managed-by: operator-lifecycle-manager + app.kubernetes.io/name: loki-operator + app.kubernetes.io/part-of: loki-operator + app.kubernetes.io/version: 0.5.0 + name: loki-operator-controller-manager-metrics-reader diff --git a/operator/bundle/community-openshift/manifests/loki-operator-controller-manager-metrics-token_v1_secret.yaml b/operator/bundle/community-openshift/manifests/loki-operator-controller-manager-metrics-token_v1_secret.yaml new file mode 100644 index 0000000000000..9cae4a32e437f --- /dev/null +++ b/operator/bundle/community-openshift/manifests/loki-operator-controller-manager-metrics-token_v1_secret.yaml @@ -0,0 +1,13 @@ +apiVersion: v1 +kind: Secret +metadata: + annotations: + kubernetes.io/service-account.name: loki-operator-controller-manager-metrics-reader + labels: + app.kubernetes.io/instance: loki-operator-v0.5.0 + app.kubernetes.io/managed-by: operator-lifecycle-manager + app.kubernetes.io/name: loki-operator + app.kubernetes.io/part-of: loki-operator + app.kubernetes.io/version: 0.5.0 + name: loki-operator-controller-manager-metrics-token +type: kubernetes.io/service-account-token diff --git a/operator/bundle/community-openshift/manifests/loki-operator-controller-manager-read-metrics_rbac.authorization.k8s.io_v1_clusterrolebinding.yaml b/operator/bundle/community-openshift/manifests/loki-operator-controller-manager-read-metrics_rbac.authorization.k8s.io_v1_clusterrolebinding.yaml new file mode 100644 index 0000000000000..b966e06579099 --- /dev/null +++ b/operator/bundle/community-openshift/manifests/loki-operator-controller-manager-read-metrics_rbac.authorization.k8s.io_v1_clusterrolebinding.yaml @@ -0,0 +1,19 @@ +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRoleBinding +metadata: + creationTimestamp: null + labels: + app.kubernetes.io/instance: loki-operator-v0.5.0 + app.kubernetes.io/managed-by: operator-lifecycle-manager + app.kubernetes.io/name: loki-operator + app.kubernetes.io/part-of: loki-operator + app.kubernetes.io/version: 0.5.0 + name: loki-operator-controller-manager-read-metrics +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: ClusterRole + name: loki-operator-metrics-reader +subjects: +- kind: ServiceAccount + name: loki-operator-controller-manager-metrics-reader + namespace: kubernetes-operators diff --git a/operator/bundle/community-openshift/manifests/loki-operator-metrics-monitor_monitoring.coreos.com_v1_servicemonitor.yaml b/operator/bundle/community-openshift/manifests/loki-operator-metrics-monitor_monitoring.coreos.com_v1_servicemonitor.yaml index 3f698d26b4761..eeebada3645f7 100644 --- a/operator/bundle/community-openshift/manifests/loki-operator-metrics-monitor_monitoring.coreos.com_v1_servicemonitor.yaml +++ b/operator/bundle/community-openshift/manifests/loki-operator-metrics-monitor_monitoring.coreos.com_v1_servicemonitor.yaml @@ -11,14 +11,21 @@ metadata: name: loki-operator-metrics-monitor spec: endpoints: - - bearerTokenFile: /var/run/secrets/kubernetes.io/serviceaccount/token + - authorization: + credentials: + key: token + name: loki-operator-controller-manager-metrics-token + type: bearer interval: 30s path: /metrics scheme: https scrapeTimeout: 10s targetPort: 8443 tlsConfig: - caFile: /etc/prometheus/configmaps/serving-certs-ca-bundle/service-ca.crt + ca: + secret: + key: service-ca.crt + name: loki-operator-controller-manager-metrics-token serverName: loki-operator-controller-manager-metrics-service.kubernetes-operators.svc selector: matchLabels: diff --git a/operator/bundle/community-openshift/manifests/loki-operator.clusterserviceversion.yaml b/operator/bundle/community-openshift/manifests/loki-operator.clusterserviceversion.yaml index 282c59b85c8de..b4e86a8a4742c 100644 --- a/operator/bundle/community-openshift/manifests/loki-operator.clusterserviceversion.yaml +++ b/operator/bundle/community-openshift/manifests/loki-operator.clusterserviceversion.yaml @@ -1678,7 +1678,7 @@ spec: - subjectaccessreviews verbs: - create - serviceAccountName: default + serviceAccountName: loki-operator-controller-manager deployments: - label: app.kubernetes.io/instance: loki-operator-v0.5.0 @@ -1779,6 +1779,7 @@ spec: runAsNonRoot: true seccompProfile: type: RuntimeDefault + serviceAccountName: loki-operator-controller-manager terminationGracePeriodSeconds: 10 volumes: - configMap: @@ -1812,7 +1813,7 @@ spec: verbs: - create - patch - serviceAccountName: default + serviceAccountName: loki-operator-controller-manager strategy: deployment installModes: - supported: false diff --git a/operator/bundle/community/manifests/loki-operator-controller-manager-metrics-reader_v1_serviceaccount.yaml b/operator/bundle/community/manifests/loki-operator-controller-manager-metrics-reader_v1_serviceaccount.yaml new file mode 100644 index 0000000000000..9c9303fdc7104 --- /dev/null +++ b/operator/bundle/community/manifests/loki-operator-controller-manager-metrics-reader_v1_serviceaccount.yaml @@ -0,0 +1,11 @@ +apiVersion: v1 +kind: ServiceAccount +metadata: + creationTimestamp: null + labels: + app.kubernetes.io/instance: loki-operator-v0.5.0 + app.kubernetes.io/managed-by: operator-lifecycle-manager + app.kubernetes.io/name: loki-operator + app.kubernetes.io/part-of: loki-operator + app.kubernetes.io/version: 0.5.0 + name: loki-operator-controller-manager-metrics-reader diff --git a/operator/bundle/community/manifests/loki-operator-controller-manager-read-metrics_rbac.authorization.k8s.io_v1_clusterrolebinding.yaml b/operator/bundle/community/manifests/loki-operator-controller-manager-read-metrics_rbac.authorization.k8s.io_v1_clusterrolebinding.yaml new file mode 100644 index 0000000000000..5566aa3280695 --- /dev/null +++ b/operator/bundle/community/manifests/loki-operator-controller-manager-read-metrics_rbac.authorization.k8s.io_v1_clusterrolebinding.yaml @@ -0,0 +1,19 @@ +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRoleBinding +metadata: + creationTimestamp: null + labels: + app.kubernetes.io/instance: loki-operator-v0.5.0 + app.kubernetes.io/managed-by: operator-lifecycle-manager + app.kubernetes.io/name: loki-operator + app.kubernetes.io/part-of: loki-operator + app.kubernetes.io/version: 0.5.0 + name: loki-operator-controller-manager-read-metrics +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: ClusterRole + name: loki-operator-metrics-reader +subjects: +- kind: ServiceAccount + name: loki-operator-controller-manager-metrics-reader + namespace: loki-operator diff --git a/operator/bundle/community/manifests/loki-operator.clusterserviceversion.yaml b/operator/bundle/community/manifests/loki-operator.clusterserviceversion.yaml index 70e8484817b19..b2797ebaf8a64 100644 --- a/operator/bundle/community/manifests/loki-operator.clusterserviceversion.yaml +++ b/operator/bundle/community/manifests/loki-operator.clusterserviceversion.yaml @@ -1658,7 +1658,7 @@ spec: - subjectaccessreviews verbs: - create - serviceAccountName: default + serviceAccountName: loki-operator-controller-manager deployments: - label: app.kubernetes.io/instance: loki-operator-v0.5.0 @@ -1748,6 +1748,7 @@ spec: kubernetes.io/os: linux securityContext: runAsNonRoot: true + serviceAccountName: loki-operator-controller-manager terminationGracePeriodSeconds: 10 volumes: - name: webhook-cert @@ -1780,7 +1781,7 @@ spec: verbs: - create - patch - serviceAccountName: default + serviceAccountName: loki-operator-controller-manager strategy: deployment installModes: - supported: false diff --git a/operator/bundle/openshift/manifests/loki-operator-controller-manager-metrics-reader_v1_serviceaccount.yaml b/operator/bundle/openshift/manifests/loki-operator-controller-manager-metrics-reader_v1_serviceaccount.yaml new file mode 100644 index 0000000000000..7a2ab31a78a4d --- /dev/null +++ b/operator/bundle/openshift/manifests/loki-operator-controller-manager-metrics-reader_v1_serviceaccount.yaml @@ -0,0 +1,11 @@ +apiVersion: v1 +kind: ServiceAccount +metadata: + creationTimestamp: null + labels: + app.kubernetes.io/instance: loki-operator-0.1.0 + app.kubernetes.io/managed-by: operator-lifecycle-manager + app.kubernetes.io/name: loki-operator + app.kubernetes.io/part-of: cluster-logging + app.kubernetes.io/version: 0.1.0 + name: loki-operator-controller-manager-metrics-reader diff --git a/operator/bundle/openshift/manifests/loki-operator-controller-manager-metrics-token_v1_secret.yaml b/operator/bundle/openshift/manifests/loki-operator-controller-manager-metrics-token_v1_secret.yaml new file mode 100644 index 0000000000000..8abb584b4ea23 --- /dev/null +++ b/operator/bundle/openshift/manifests/loki-operator-controller-manager-metrics-token_v1_secret.yaml @@ -0,0 +1,13 @@ +apiVersion: v1 +kind: Secret +metadata: + annotations: + kubernetes.io/service-account.name: loki-operator-controller-manager-metrics-reader + labels: + app.kubernetes.io/instance: loki-operator-0.1.0 + app.kubernetes.io/managed-by: operator-lifecycle-manager + app.kubernetes.io/name: loki-operator + app.kubernetes.io/part-of: cluster-logging + app.kubernetes.io/version: 0.1.0 + name: loki-operator-controller-manager-metrics-token +type: kubernetes.io/service-account-token diff --git a/operator/bundle/openshift/manifests/loki-operator-controller-manager-read-metrics_rbac.authorization.k8s.io_v1_clusterrolebinding.yaml b/operator/bundle/openshift/manifests/loki-operator-controller-manager-read-metrics_rbac.authorization.k8s.io_v1_clusterrolebinding.yaml new file mode 100644 index 0000000000000..040591c9f7bb2 --- /dev/null +++ b/operator/bundle/openshift/manifests/loki-operator-controller-manager-read-metrics_rbac.authorization.k8s.io_v1_clusterrolebinding.yaml @@ -0,0 +1,19 @@ +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRoleBinding +metadata: + creationTimestamp: null + labels: + app.kubernetes.io/instance: loki-operator-0.1.0 + app.kubernetes.io/managed-by: operator-lifecycle-manager + app.kubernetes.io/name: loki-operator + app.kubernetes.io/part-of: cluster-logging + app.kubernetes.io/version: 0.1.0 + name: loki-operator-controller-manager-read-metrics +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: ClusterRole + name: loki-operator-metrics-reader +subjects: +- kind: ServiceAccount + name: loki-operator-controller-manager-metrics-reader + namespace: openshift-operators-redhat diff --git a/operator/bundle/openshift/manifests/loki-operator-metrics-monitor_monitoring.coreos.com_v1_servicemonitor.yaml b/operator/bundle/openshift/manifests/loki-operator-metrics-monitor_monitoring.coreos.com_v1_servicemonitor.yaml index 7c62cf0585190..bdb0ee3344d2a 100644 --- a/operator/bundle/openshift/manifests/loki-operator-metrics-monitor_monitoring.coreos.com_v1_servicemonitor.yaml +++ b/operator/bundle/openshift/manifests/loki-operator-metrics-monitor_monitoring.coreos.com_v1_servicemonitor.yaml @@ -11,14 +11,21 @@ metadata: name: loki-operator-metrics-monitor spec: endpoints: - - bearerTokenFile: /var/run/secrets/kubernetes.io/serviceaccount/token + - authorization: + credentials: + key: token + name: loki-operator-controller-manager-metrics-token + type: bearer interval: 30s path: /metrics scheme: https scrapeTimeout: 10s targetPort: 8443 tlsConfig: - caFile: /etc/prometheus/configmaps/serving-certs-ca-bundle/service-ca.crt + ca: + secret: + key: service-ca.crt + name: loki-operator-controller-manager-metrics-token serverName: loki-operator-controller-manager-metrics-service.openshift-operators-redhat.svc selector: matchLabels: diff --git a/operator/bundle/openshift/manifests/loki-operator.clusterserviceversion.yaml b/operator/bundle/openshift/manifests/loki-operator.clusterserviceversion.yaml index ee2dcb513fe3b..aa3871373e30d 100644 --- a/operator/bundle/openshift/manifests/loki-operator.clusterserviceversion.yaml +++ b/operator/bundle/openshift/manifests/loki-operator.clusterserviceversion.yaml @@ -1663,7 +1663,7 @@ spec: - subjectaccessreviews verbs: - create - serviceAccountName: default + serviceAccountName: loki-operator-controller-manager deployments: - label: app.kubernetes.io/instance: loki-operator-0.1.0 @@ -1764,6 +1764,7 @@ spec: runAsNonRoot: true seccompProfile: type: RuntimeDefault + serviceAccountName: loki-operator-controller-manager terminationGracePeriodSeconds: 10 volumes: - configMap: @@ -1797,7 +1798,7 @@ spec: verbs: - create - patch - serviceAccountName: default + serviceAccountName: loki-operator-controller-manager strategy: deployment installModes: - supported: false diff --git a/operator/config/manager/manager.yaml b/operator/config/manager/manager.yaml index 3b617b00b1c41..fe6a940c38857 100644 --- a/operator/config/manager/manager.yaml +++ b/operator/config/manager/manager.yaml @@ -39,4 +39,5 @@ spec: periodSeconds: 10 nodeSelector: kubernetes.io/os: linux + serviceAccountName: controller-manager terminationGracePeriodSeconds: 10 diff --git a/operator/config/overlays/community-openshift/prometheus_service_monitor_patch.yaml b/operator/config/overlays/community-openshift/prometheus_service_monitor_patch.yaml index 82f75710f84f6..7ece571465a0c 100644 --- a/operator/config/overlays/community-openshift/prometheus_service_monitor_patch.yaml +++ b/operator/config/overlays/community-openshift/prometheus_service_monitor_patch.yaml @@ -6,12 +6,19 @@ metadata: name: metrics-monitor spec: endpoints: - - bearerTokenFile: /var/run/secrets/kubernetes.io/serviceaccount/token - path: /metrics + - path: /metrics targetPort: 8443 scheme: https interval: 30s scrapeTimeout: 10s + authorization: + type: bearer + credentials: + key: token + name: loki-operator-controller-manager-metrics-token tlsConfig: - caFile: /etc/prometheus/configmaps/serving-certs-ca-bundle/service-ca.crt + ca: + secret: + key: service-ca.crt + name: loki-operator-controller-manager-metrics-token serverName: loki-operator-controller-manager-metrics-service.kubernetes-operators.svc diff --git a/operator/config/overlays/openshift/kustomization.yaml b/operator/config/overlays/openshift/kustomization.yaml index 9a74cbddba342..cdd65f1cbeafc 100644 --- a/operator/config/overlays/openshift/kustomization.yaml +++ b/operator/config/overlays/openshift/kustomization.yaml @@ -4,6 +4,7 @@ resources: - ../../manager - ../../webhook - ../../prometheus +- manager_metrics_secret_token.yaml # Adds namespace to all resources. namespace: openshift-operators-redhat diff --git a/operator/config/overlays/openshift/manager_metrics_secret_token.yaml b/operator/config/overlays/openshift/manager_metrics_secret_token.yaml new file mode 100644 index 0000000000000..b4847d3a3e488 --- /dev/null +++ b/operator/config/overlays/openshift/manager_metrics_secret_token.yaml @@ -0,0 +1,7 @@ +apiVersion: v1 +kind: Secret +metadata: + name: controller-manager-metrics-token + annotations: + kubernetes.io/service-account.name: loki-operator-controller-manager-metrics-reader +type: kubernetes.io/service-account-token diff --git a/operator/config/overlays/openshift/prometheus_service_monitor_patch.yaml b/operator/config/overlays/openshift/prometheus_service_monitor_patch.yaml index 35c522749b6dc..35d6b0362cd7c 100644 --- a/operator/config/overlays/openshift/prometheus_service_monitor_patch.yaml +++ b/operator/config/overlays/openshift/prometheus_service_monitor_patch.yaml @@ -6,12 +6,19 @@ metadata: name: metrics-monitor spec: endpoints: - - bearerTokenFile: /var/run/secrets/kubernetes.io/serviceaccount/token - path: /metrics + - path: /metrics targetPort: 8443 scheme: https interval: 30s scrapeTimeout: 10s + authorization: + type: bearer + credentials: + key: token + name: loki-operator-controller-manager-metrics-token tlsConfig: - caFile: /etc/prometheus/configmaps/serving-certs-ca-bundle/service-ca.crt + ca: + secret: + key: service-ca.crt + name: loki-operator-controller-manager-metrics-token serverName: loki-operator-controller-manager-metrics-service.openshift-operators-redhat.svc diff --git a/operator/config/rbac/auth_proxy_client_clusterrolebinding.yaml b/operator/config/rbac/auth_proxy_client_clusterrolebinding.yaml new file mode 100644 index 0000000000000..7228087e23edb --- /dev/null +++ b/operator/config/rbac/auth_proxy_client_clusterrolebinding.yaml @@ -0,0 +1,12 @@ +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRoleBinding +metadata: + name: controller-manager-read-metrics +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: ClusterRole + name: metrics-reader +subjects: +- kind: ServiceAccount + name: controller-manager-metrics-reader + namespace: system diff --git a/operator/config/rbac/auth_proxy_client_serviceaccount.yaml b/operator/config/rbac/auth_proxy_client_serviceaccount.yaml new file mode 100644 index 0000000000000..041ac56b630cf --- /dev/null +++ b/operator/config/rbac/auth_proxy_client_serviceaccount.yaml @@ -0,0 +1,5 @@ +apiVersion: v1 +kind: ServiceAccount +metadata: + name: controller-manager-metrics-reader + namespace: system diff --git a/operator/config/rbac/auth_proxy_role_binding.yaml b/operator/config/rbac/auth_proxy_role_binding.yaml index 48ed1e4b85c42..ec7acc0a1b79c 100644 --- a/operator/config/rbac/auth_proxy_role_binding.yaml +++ b/operator/config/rbac/auth_proxy_role_binding.yaml @@ -8,5 +8,5 @@ roleRef: name: proxy-role subjects: - kind: ServiceAccount - name: default + name: controller-manager namespace: system diff --git a/operator/config/rbac/kustomization.yaml b/operator/config/rbac/kustomization.yaml index b48c43c1690bf..c8b43e2fe40f6 100644 --- a/operator/config/rbac/kustomization.yaml +++ b/operator/config/rbac/kustomization.yaml @@ -7,5 +7,8 @@ resources: - auth_proxy_role.yaml - auth_proxy_role_binding.yaml - auth_proxy_client_clusterrole.yaml +- auth_proxy_client_clusterrolebinding.yaml +- auth_proxy_client_serviceaccount.yaml - prometheus_role.yaml - prometheus_role_binding.yaml +- serviceaccount.yaml diff --git a/operator/config/rbac/leader_election_role_binding.yaml b/operator/config/rbac/leader_election_role_binding.yaml index eed16906f4dcb..1d1321ed4f020 100644 --- a/operator/config/rbac/leader_election_role_binding.yaml +++ b/operator/config/rbac/leader_election_role_binding.yaml @@ -8,5 +8,5 @@ roleRef: name: leader-election-role subjects: - kind: ServiceAccount - name: default + name: controller-manager namespace: system diff --git a/operator/config/rbac/role_binding.yaml b/operator/config/rbac/role_binding.yaml index e97e9b5e1e83b..93d27e99a43e8 100644 --- a/operator/config/rbac/role_binding.yaml +++ b/operator/config/rbac/role_binding.yaml @@ -8,5 +8,5 @@ roleRef: name: lokistack-manager subjects: - kind: ServiceAccount - name: default + name: controller-manager namespace: system diff --git a/operator/config/rbac/serviceaccount.yaml b/operator/config/rbac/serviceaccount.yaml new file mode 100644 index 0000000000000..7cd6025bfc4af --- /dev/null +++ b/operator/config/rbac/serviceaccount.yaml @@ -0,0 +1,5 @@ +apiVersion: v1 +kind: ServiceAccount +metadata: + name: controller-manager + namespace: system From 63c88489d009929f704dabc9fdcd18662214a174 Mon Sep 17 00:00:00 2001 From: Salva Corts Date: Fri, 15 Mar 2024 09:10:16 +0100 Subject: [PATCH 16/25] refactor: Add RF and Tokens to Loki ring Cfg and allow overwriting docs. (#12142) Co-authored-by: Owen Diehl --- docs/sources/configure/_index.md | 54 ++++++++++-------- pkg/bloomcompactor/bloomcompactor_test.go | 16 +++--- pkg/bloomcompactor/config.go | 33 ++++++----- pkg/bloomgateway/bloomgateway_test.go | 28 ++++------ pkg/bloomgateway/config.go | 19 +++---- pkg/compactor/compactor.go | 17 +++++- pkg/loki/config_wrapper.go | 5 +- pkg/loki/config_wrapper_test.go | 2 +- pkg/loki/loki.go | 6 ++ pkg/loki/modules.go | 10 ++-- pkg/scheduler/scheduler.go | 27 ++++++++- .../indexshipper/indexgateway/config.go | 50 +++++++++-------- pkg/util/flagext/flagsetskip.go | 55 +++++++++++++++++++ pkg/util/ring/ring_config.go | 26 +++------ tools/doc-generator/writer.go | 15 ++++- 15 files changed, 235 insertions(+), 128 deletions(-) create mode 100644 pkg/util/flagext/flagsetskip.go diff --git a/docs/sources/configure/_index.md b/docs/sources/configure/_index.md index 18fcc83bb5929..c327e919f059f 100644 --- a/docs/sources/configure/_index.md +++ b/docs/sources/configure/_index.md @@ -1763,6 +1763,12 @@ ring: # CLI flag: -index-gateway.ring.zone-awareness-enabled [zone_awareness_enabled: | default = false] + # Deprecated: How many index gateway instances are assigned to each tenant. + # Use -index-gateway.shard-size instead. The shard size is also a per-tenant + # setting. + # CLI flag: -replication-factor + [replication_factor: | default = 3] + # Instance ID to register in the ring. # CLI flag: -index-gateway.ring.instance-id [instance_id: | default = ""] @@ -1787,12 +1793,6 @@ ring: # Enable using a IPv6 instance address. # CLI flag: -index-gateway.ring.instance-enable-ipv6 [instance_enable_ipv6: | default = false] - - # Deprecated: How many index gateway instances are assigned to each tenant. - # Use -index-gateway.shard-size instead. The shard size is also a per-tenant - # setting. - # CLI flag: -replication-factor - [replication_factor: | default = 3] ``` ### bloom_gateway @@ -1860,6 +1860,17 @@ ring: # CLI flag: -bloom-gateway.ring.zone-awareness-enabled [zone_awareness_enabled: | default = false] + # Number of tokens to use in the ring. The bigger the number of tokens, the + # more fingerprint ranges the compactor will own, but the smaller these ranges + # will be. Bigger number of tokens means that more but smaller requests will + # be handled by each gateway. + # CLI flag: -bloom-gateway.ring.tokens + [num_tokens: | default = 16] + + # Factor for data replication. + # CLI flag: -bloom-gateway.ring.replication-factor + [replication_factor: | default = 3] + # Instance ID to register in the ring. # CLI flag: -bloom-gateway.ring.instance-id [instance_id: | default = ""] @@ -1885,17 +1896,6 @@ ring: # CLI flag: -bloom-gateway.ring.instance-enable-ipv6 [instance_enable_ipv6: | default = false] - # Factor for data replication. - # CLI flag: -bloom-gateway.replication-factor - [replication_factor: | default = 3] - - # Number of tokens to use in the ring. The bigger the number of tokens, the - # more fingerprint ranges the compactor will own, but the smaller these ranges - # will be. Bigger number of tokens means that more but smaller requests will - # be handled by each gateway. - # CLI flag: -bloom-gateway.ring.tokens - [tokens: | default = 16] - # Flag to enable or disable the bloom gateway component globally. # CLI flag: -bloom-gateway.enabled [enabled: | default = false] @@ -2654,6 +2654,11 @@ ring: # CLI flag: -bloom-compactor.ring.zone-awareness-enabled [zone_awareness_enabled: | default = false] + # Number of tokens to use in the ring per compactor. Higher number of tokens + # will result in more and smaller files (metas and blocks.) + # CLI flag: -bloom-compactor.ring.num-tokens + [num_tokens: | default = 10] + # Instance ID to register in the ring. # CLI flag: -bloom-compactor.ring.instance-id [instance_id: | default = ""] @@ -2679,13 +2684,6 @@ ring: # CLI flag: -bloom-compactor.ring.instance-enable-ipv6 [instance_enable_ipv6: | default = false] - # Number of tokens to use in the ring. The bigger the number of tokens, the - # more fingerprint ranges the compactor will own, but the smaller these ranges - # will be. Bigger number of tokens will result in more and smaller metas and - # blocks. - # CLI flag: -bloom-compactor.ring.tokens - [tokens: | default = 10] - # Flag to enable or disable the usage of the bloom-compactor component. # CLI flag: -bloom-compactor.enabled [enabled: | default = false] @@ -3836,6 +3834,14 @@ ring: # CLI flag: -common.storage.ring.zone-awareness-enabled [zone_awareness_enabled: | default = false] + # Number of tokens to own in the ring. + # CLI flag: -common.storage.ring.num-tokens + [num_tokens: | default = 128] + + # Factor for data replication. + # CLI flag: -common.storage.ring.replication-factor + [replication_factor: | default = 3] + # Instance ID to register in the ring. # CLI flag: -common.storage.ring.instance-id [instance_id: | default = ""] diff --git a/pkg/bloomcompactor/bloomcompactor_test.go b/pkg/bloomcompactor/bloomcompactor_test.go index 71d5b843ca04b..70e76d41e9856 100644 --- a/pkg/bloomcompactor/bloomcompactor_test.go +++ b/pkg/bloomcompactor/bloomcompactor_test.go @@ -68,22 +68,20 @@ func TestCompactor_ownsTenant(t *testing.T) { var ringManagers []*lokiring.RingManager var compactors []*Compactor for i := 0; i < tc.compactors; i++ { - var ringCfg RingConfig - ringCfg.RegisterFlagsWithPrefix("", "", flag.NewFlagSet("ring", flag.PanicOnError)) - ringCfg.KVStore.Store = "inmemory" - ringCfg.InstanceID = fmt.Sprintf("bloom-compactor-%d", i) - ringCfg.InstanceAddr = fmt.Sprintf("localhost-%d", i) + var cfg Config + cfg.RegisterFlags(flag.NewFlagSet("ring", flag.PanicOnError)) + cfg.Ring.KVStore.Store = "inmemory" + cfg.Ring.InstanceID = fmt.Sprintf("bloom-compactor-%d", i) + cfg.Ring.InstanceAddr = fmt.Sprintf("localhost-%d", i) - ringManager, err := lokiring.NewRingManager("bloom-compactor", lokiring.ServerMode, ringCfg.RingConfig, 1, ringCfg.Tokens, util_log.Logger, prometheus.NewRegistry()) + ringManager, err := lokiring.NewRingManager("bloom-compactor", lokiring.ServerMode, cfg.Ring, 1, cfg.Ring.NumTokens, util_log.Logger, prometheus.NewRegistry()) require.NoError(t, err) require.NoError(t, ringManager.StartAsync(context.Background())) shuffleSharding := util_ring.NewTenantShuffleSharding(ringManager.Ring, ringManager.RingLifecycler, tc.limits.BloomCompactorShardSize) compactor := &Compactor{ - cfg: Config{ - Ring: ringCfg, - }, + cfg: cfg, sharding: shuffleSharding, limits: tc.limits, } diff --git a/pkg/bloomcompactor/config.go b/pkg/bloomcompactor/config.go index b887493c1a867..fee457767647b 100644 --- a/pkg/bloomcompactor/config.go +++ b/pkg/bloomcompactor/config.go @@ -3,18 +3,23 @@ package bloomcompactor import ( "flag" "fmt" + "github.com/pkg/errors" "time" "github.com/grafana/loki/pkg/storage/stores/shipper/indexshipper/downloads" "github.com/grafana/loki/pkg/util/ring" ) +const ( + ringReplicationFactor = 1 +) + // Config configures the bloom-compactor component. type Config struct { // Ring configures the ring store used to save and retrieve the different Bloom-Compactor instances. // In case it isn't explicitly set, it follows the same behavior of the other rings (ex: using the common configuration // section and the ingester configuration by default). - Ring RingConfig `yaml:"ring,omitempty" doc:"description=Defines the ring to be used by the bloom-compactor servers. In case this isn't configured, this block supports inheriting configuration from the common ring section."` + Ring ring.RingConfig `yaml:"ring,omitempty" doc:"description=Defines the ring to be used by the bloom-compactor servers. In case this isn't configured, this block supports inheriting configuration from the common ring section."` // Enabled configures whether bloom-compactors should be used to compact index values into bloomfilters Enabled bool `yaml:"enabled"` CompactionInterval time.Duration `yaml:"compaction_interval"` @@ -30,7 +35,6 @@ type Config struct { // RegisterFlags registers flags for the Bloom-Compactor configuration. func (cfg *Config) RegisterFlags(f *flag.FlagSet) { - cfg.Ring.RegisterFlagsWithPrefix("bloom-compactor.", "collectors/", f) f.BoolVar(&cfg.Enabled, "bloom-compactor.enabled", false, "Flag to enable or disable the usage of the bloom-compactor component.") f.DurationVar(&cfg.CompactionInterval, "bloom-compactor.compaction-interval", 10*time.Minute, "Interval at which to re-run the compaction operation.") f.IntVar(&cfg.WorkerParallelism, "bloom-compactor.worker-parallelism", 1, "Number of workers to run in parallel for compaction.") @@ -48,26 +52,29 @@ func (cfg *Config) RegisterFlags(f *flag.FlagSet) { f.DurationVar(&cfg.RetryMaxBackoff, "bloom-compactor.compaction-retries-max-backoff", time.Minute, "Maximum backoff time between retries.") f.IntVar(&cfg.CompactionRetries, "bloom-compactor.compaction-retries", 3, "Number of retries to perform when compaction fails.") f.IntVar(&cfg.MaxCompactionParallelism, "bloom-compactor.max-compaction-parallelism", 1, "Maximum number of tables to compact in parallel. While increasing this value, please make sure compactor has enough disk space allocated to be able to store and compact as many tables.") + + // Ring + skipFlags := []string{ + "bloom-compactor.ring.num-tokens", + "bloom-compactor.ring.replication-factor", + } + cfg.Ring.RegisterFlagsWithPrefix("bloom-compactor.", "collectors/", f, skipFlags...) + // Overrides + f.IntVar(&cfg.Ring.NumTokens, "bloom-compactor.ring.num-tokens", 10, "Number of tokens to use in the ring per compactor. Higher number of tokens will result in more and smaller files (metas and blocks.)") + // Ignored + f.IntVar(&cfg.Ring.ReplicationFactor, "bloom-compactor.ring.replication-factor", ringReplicationFactor, fmt.Sprintf("IGNORED: Replication factor is fixed to %d", ringReplicationFactor)) } func (cfg *Config) Validate() error { if cfg.MinTableCompactionPeriod > cfg.MaxTableCompactionPeriod { return fmt.Errorf("min_compaction_age must be less than or equal to max_compaction_age") } + if cfg.Ring.ReplicationFactor != ringReplicationFactor { + return errors.New("Replication factor must not be changed as it will not take effect") + } return nil } -type RingConfig struct { - ring.RingConfig `yaml:",inline"` - - Tokens int `yaml:"tokens"` -} - -func (cfg *RingConfig) RegisterFlagsWithPrefix(flagsPrefix, storePrefix string, f *flag.FlagSet) { - cfg.RingConfig.RegisterFlagsWithPrefix(flagsPrefix, storePrefix, f) - f.IntVar(&cfg.Tokens, flagsPrefix+"ring.tokens", 10, "Number of tokens to use in the ring. The bigger the number of tokens, the more fingerprint ranges the compactor will own, but the smaller these ranges will be. Bigger number of tokens will result in more and smaller metas and blocks.") -} - type Limits interface { downloads.Limits BloomCompactorShardSize(tenantID string) int diff --git a/pkg/bloomgateway/bloomgateway_test.go b/pkg/bloomgateway/bloomgateway_test.go index 54651596f9b22..2318fdb535d05 100644 --- a/pkg/bloomgateway/bloomgateway_test.go +++ b/pkg/bloomgateway/bloomgateway_test.go @@ -102,16 +102,12 @@ func TestBloomGateway_StartStopService(t *testing.T) { cfg := Config{ Enabled: true, - Ring: RingConfig{ - RingConfigWithRF: lokiring.RingConfigWithRF{ - RingConfig: lokiring.RingConfig{ - KVStore: kv.Config{ - Mock: kvStore, - }, - }, - ReplicationFactor: 1, + Ring: lokiring.RingConfig{ + KVStore: kv.Config{ + Mock: kvStore, }, - Tokens: 16, + ReplicationFactor: 1, + NumTokens: 16, }, WorkerConcurrency: 4, MaxOutstandingPerTenant: 1024, @@ -147,16 +143,12 @@ func TestBloomGateway_FilterChunkRefs(t *testing.T) { cfg := Config{ Enabled: true, - Ring: RingConfig{ - RingConfigWithRF: lokiring.RingConfigWithRF{ - RingConfig: lokiring.RingConfig{ - KVStore: kv.Config{ - Mock: kvStore, - }, - }, - ReplicationFactor: 1, + Ring: lokiring.RingConfig{ + KVStore: kv.Config{ + Mock: kvStore, }, - Tokens: 16, + ReplicationFactor: 1, + NumTokens: 16, }, WorkerConcurrency: 4, MaxOutstandingPerTenant: 1024, diff --git a/pkg/bloomgateway/config.go b/pkg/bloomgateway/config.go index 42c476e00d41a..ad5d2928728a6 100644 --- a/pkg/bloomgateway/config.go +++ b/pkg/bloomgateway/config.go @@ -11,7 +11,7 @@ type Config struct { // Ring configures the ring store used to save and retrieve the different Bloom Gateway instances. // In case it isn't explicitly set, it follows the same behavior of the other rings (ex: using the common configuration // section and the ingester configuration by default). - Ring RingConfig `yaml:"ring,omitempty" doc:"description=Defines the ring to be used by the bloom gateway servers and clients. In case this isn't configured, this block supports inheriting configuration from the common ring section."` + Ring ring.RingConfig `yaml:"ring,omitempty" doc:"description=Defines the ring to be used by the bloom gateway servers and clients. In case this isn't configured, this block supports inheriting configuration from the common ring section."` // Enabled is the global switch to configures whether Bloom Gateways should be used to filter chunks. Enabled bool `yaml:"enabled"` // Client configures the Bloom Gateway client @@ -29,7 +29,6 @@ func (cfg *Config) RegisterFlags(f *flag.FlagSet) { // RegisterFlagsWithPrefix registers flags for the Bloom Gateway configuration with a common prefix. func (cfg *Config) RegisterFlagsWithPrefix(prefix string, f *flag.FlagSet) { - cfg.Ring.RegisterFlagsWithPrefix(prefix, "collectors/", f) f.BoolVar(&cfg.Enabled, prefix+"enabled", false, "Flag to enable or disable the bloom gateway component globally.") f.IntVar(&cfg.WorkerConcurrency, prefix+"worker-concurrency", 4, "Number of workers to use for filtering chunks concurrently.") f.IntVar(&cfg.MaxOutstandingPerTenant, prefix+"max-outstanding-per-tenant", 1024, "Maximum number of outstanding tasks per tenant.") @@ -37,17 +36,13 @@ func (cfg *Config) RegisterFlagsWithPrefix(prefix string, f *flag.FlagSet) { // TODO(chaudum): Figure out what the better place is for registering flags // -bloom-gateway.client.* or -bloom-gateway-client.* cfg.Client.RegisterFlags(f) -} - -type RingConfig struct { - ring.RingConfigWithRF `yaml:",inline"` - - Tokens int `yaml:"tokens"` -} -func (cfg *RingConfig) RegisterFlagsWithPrefix(flagsPrefix, storePrefix string, f *flag.FlagSet) { - cfg.RingConfigWithRF.RegisterFlagsWithPrefix(flagsPrefix, storePrefix, f) - f.IntVar(&cfg.Tokens, flagsPrefix+"ring.tokens", 16, "Number of tokens to use in the ring. The bigger the number of tokens, the more fingerprint ranges the compactor will own, but the smaller these ranges will be. Bigger number of tokens means that more but smaller requests will be handled by each gateway.") + // Ring + skipFlags := []string{ + prefix + "ring.tokens", + } + cfg.Ring.RegisterFlagsWithPrefix(prefix, "collectors/", f, skipFlags...) + f.IntVar(&cfg.Ring.NumTokens, prefix+"ring.tokens", 16, "Number of tokens to use in the ring. The bigger the number of tokens, the more fingerprint ranges the compactor will own, but the smaller these ranges will be. Bigger number of tokens means that more but smaller requests will be handled by each gateway.") } type Limits interface { diff --git a/pkg/compactor/compactor.go b/pkg/compactor/compactor.go index f5062f2d6e33e..75bd575e2c77c 100644 --- a/pkg/compactor/compactor.go +++ b/pkg/compactor/compactor.go @@ -110,7 +110,14 @@ func (cfg *Config) RegisterFlags(f *flag.FlagSet) { f.IntVar(&cfg.TablesToCompact, "compactor.tables-to-compact", 0, "Number of tables that compactor will try to compact. Newer tables are chosen when this is less than the number of tables available.") f.IntVar(&cfg.SkipLatestNTables, "compactor.skip-latest-n-tables", 0, "Do not compact N latest tables. Together with -compactor.run-once and -compactor.tables-to-compact, this is useful when clearing compactor backlogs.") - cfg.CompactorRing.RegisterFlagsWithPrefix("compactor.", "collectors/", f) + // Ring + skipFlags := []string{ + "compactor.ring.num-tokens", + "compactor.ring.replication-factor", + } + cfg.CompactorRing.RegisterFlagsWithPrefix("compactor.", "collectors/", f, skipFlags...) + f.IntVar(&cfg.CompactorRing.NumTokens, "compactor.ring.num-tokens", ringNumTokens, fmt.Sprintf("IGNORED: Num tokens is fixed to %d", ringNumTokens)) + f.IntVar(&cfg.CompactorRing.ReplicationFactor, "compactor.ring.replication-factor", ringReplicationFactor, fmt.Sprintf("IGNORED: Replication factor is fixed to %d", ringReplicationFactor)) } // Validate verifies the config does not contain inappropriate values @@ -119,6 +126,14 @@ func (cfg *Config) Validate() error { return errors.New("max compaction parallelism must be >= 1") } + if cfg.CompactorRing.NumTokens != ringNumTokens { + return errors.New("Num tokens must not be changed as it will not take effect") + } + + if cfg.CompactorRing.ReplicationFactor != ringReplicationFactor { + return errors.New("Replication factor must not be changed as it will not take effect") + } + if cfg.RetentionEnabled { if cfg.DeleteRequestStore == "" { return fmt.Errorf("compactor.delete-request-store should be configured when retention is enabled") diff --git a/pkg/loki/config_wrapper.go b/pkg/loki/config_wrapper.go index f76e0f75da9f7..8a5f6c6811250 100644 --- a/pkg/loki/config_wrapper.go +++ b/pkg/loki/config_wrapper.go @@ -178,10 +178,11 @@ func applyInstanceConfigs(r, defaults *ConfigWrapper) { } } -// applyCommonReplicationFactor apply the common replication factor to the Index Gateway ring. +// applyCommonReplicationFactor apply the common replication factor to the Index Gateway and Bloom Gateway rings. func applyCommonReplicationFactor(r, defaults *ConfigWrapper) { if !reflect.DeepEqual(r.Common.ReplicationFactor, defaults.Common.ReplicationFactor) { r.IndexGateway.Ring.ReplicationFactor = r.Common.ReplicationFactor + r.BloomGateway.Ring.ReplicationFactor = r.Common.ReplicationFactor } } @@ -314,6 +315,7 @@ func applyConfigToRings(r, defaults *ConfigWrapper, rc lokiring.RingConfig, merg r.BloomCompactor.Ring.InstanceZone = rc.InstanceZone r.BloomCompactor.Ring.ZoneAwarenessEnabled = rc.ZoneAwarenessEnabled r.BloomCompactor.Ring.KVStore = rc.KVStore + r.BloomCompactor.Ring.NumTokens = rc.NumTokens } // BloomGateway @@ -327,6 +329,7 @@ func applyConfigToRings(r, defaults *ConfigWrapper, rc lokiring.RingConfig, merg r.BloomGateway.Ring.InstanceZone = rc.InstanceZone r.BloomGateway.Ring.ZoneAwarenessEnabled = rc.ZoneAwarenessEnabled r.BloomGateway.Ring.KVStore = rc.KVStore + r.BloomGateway.Ring.NumTokens = rc.NumTokens } } diff --git a/pkg/loki/config_wrapper_test.go b/pkg/loki/config_wrapper_test.go index 60c9223732d05..41705f012f020 100644 --- a/pkg/loki/config_wrapper_test.go +++ b/pkg/loki/config_wrapper_test.go @@ -1174,7 +1174,7 @@ func Test_applyIngesterRingConfig(t *testing.T) { assert.Equal(t, 9, reflect.TypeOf(distributor.RingConfig{}).NumField(), fmt.Sprintf(msgf, reflect.TypeOf(distributor.RingConfig{}).String())) - assert.Equal(t, 13, + assert.Equal(t, 15, reflect.TypeOf(lokiring.RingConfig{}).NumField(), fmt.Sprintf(msgf, reflect.TypeOf(lokiring.RingConfig{}).String())) }) diff --git a/pkg/loki/loki.go b/pkg/loki/loki.go index dac605dacb7d4..f44c079e51dff 100644 --- a/pkg/loki/loki.go +++ b/pkg/loki/loki.go @@ -223,6 +223,9 @@ func (c *Config) Validate() error { if err := c.Querier.Validate(); err != nil { return errors.Wrap(err, "invalid querier config") } + if err := c.QueryScheduler.Validate(); err != nil { + return errors.Wrap(err, "invalid query_scheduler config") + } if err := c.TableManager.Validate(); err != nil { return errors.Wrap(err, "invalid tablemanager config") } @@ -241,6 +244,9 @@ func (c *Config) Validate() error { if err := c.StorageConfig.BoltDBShipperConfig.Validate(); err != nil { return errors.Wrap(err, "invalid boltdb-shipper config") } + if err := c.IndexGateway.Validate(); err != nil { + return errors.Wrap(err, "invalid index_gateway config") + } if err := c.CompactorConfig.Validate(); err != nil { return errors.Wrap(err, "invalid compactor config") } diff --git a/pkg/loki/modules.go b/pkg/loki/modules.go index e5ca22dd65344..0761331f6b689 100644 --- a/pkg/loki/modules.go +++ b/pkg/loki/modules.go @@ -1345,7 +1345,7 @@ func (t *Loki) initBloomGatewayRing() (services.Service, error) { if t.Cfg.isModuleEnabled(BloomGateway) || t.Cfg.isModuleEnabled(Backend) || legacyReadMode { mode = lokiring.ServerMode } - manager, err := lokiring.NewRingManager(bloomGatewayRingKey, mode, t.Cfg.BloomGateway.Ring.RingConfig, t.Cfg.BloomGateway.Ring.ReplicationFactor, t.Cfg.BloomGateway.Ring.Tokens, util_log.Logger, prometheus.DefaultRegisterer) + manager, err := lokiring.NewRingManager(bloomGatewayRingKey, mode, t.Cfg.BloomGateway.Ring, t.Cfg.BloomGateway.Ring.ReplicationFactor, t.Cfg.BloomGateway.Ring.NumTokens, util_log.Logger, prometheus.DefaultRegisterer) if err != nil { return nil, gerrors.Wrap(err, "error initializing bloom gateway ring manager") } @@ -1442,7 +1442,7 @@ func (t *Loki) initIndexGatewayRing() (_ services.Service, err error) { if t.Cfg.isModuleEnabled(IndexGateway) || legacyReadMode || t.Cfg.isModuleEnabled(Backend) { managerMode = lokiring.ServerMode } - rm, err := lokiring.NewRingManager(indexGatewayRingKey, managerMode, t.Cfg.IndexGateway.Ring.RingConfig, t.Cfg.IndexGateway.Ring.ReplicationFactor, 128, util_log.Logger, prometheus.DefaultRegisterer) + rm, err := lokiring.NewRingManager(indexGatewayRingKey, managerMode, t.Cfg.IndexGateway.Ring, t.Cfg.IndexGateway.Ring.ReplicationFactor, indexgateway.NumTokens, util_log.Logger, prometheus.DefaultRegisterer) if err != nil { return nil, gerrors.Wrap(err, "new index gateway ring manager") @@ -1493,7 +1493,7 @@ func (t *Loki) initBloomCompactorRing() (services.Service, error) { // is LegacyMode needed? // legacyReadMode := t.Cfg.LegacyReadTarget && t.isModuleActive(Read) - rm, err := lokiring.NewRingManager(bloomCompactorRingKey, lokiring.ServerMode, t.Cfg.BloomCompactor.Ring.RingConfig, 1, t.Cfg.BloomCompactor.Ring.Tokens, util_log.Logger, prometheus.DefaultRegisterer) + rm, err := lokiring.NewRingManager(bloomCompactorRingKey, lokiring.ServerMode, t.Cfg.BloomCompactor.Ring, 1, t.Cfg.BloomCompactor.Ring.NumTokens, util_log.Logger, prometheus.DefaultRegisterer) if err != nil { return nil, gerrors.Wrap(err, "error initializing bloom-compactor ring manager") } @@ -1534,9 +1534,7 @@ func (t *Loki) initQuerySchedulerRing() (_ services.Service, err error) { if t.Cfg.isModuleEnabled(QueryScheduler) || t.Cfg.isModuleEnabled(Backend) || t.Cfg.isModuleEnabled(All) || (t.Cfg.LegacyReadTarget && t.Cfg.isModuleEnabled(Read)) { managerMode = lokiring.ServerMode } - rf := 2 // ringReplicationFactor should be 2 because we want 2 schedulers. - tokens := 1 // we only need to insert 1 token to be used for leader election purposes. - rm, err := lokiring.NewRingManager(schedulerRingKey, managerMode, t.Cfg.QueryScheduler.SchedulerRing, rf, tokens, util_log.Logger, prometheus.DefaultRegisterer) + rm, err := lokiring.NewRingManager(schedulerRingKey, managerMode, t.Cfg.QueryScheduler.SchedulerRing, scheduler.ReplicationFactor, scheduler.NumTokens, util_log.Logger, prometheus.DefaultRegisterer) if err != nil { return nil, gerrors.Wrap(err, "new scheduler ring manager") diff --git a/pkg/scheduler/scheduler.go b/pkg/scheduler/scheduler.go index 5cd163ff0ffa1..4c26becce7a63 100644 --- a/pkg/scheduler/scheduler.go +++ b/pkg/scheduler/scheduler.go @@ -39,6 +39,13 @@ import ( lokiring "github.com/grafana/loki/pkg/util/ring" ) +const ( + // NumTokens is 1 since we only need to insert 1 token to be used for leader election purposes. + NumTokens = 1 + // ReplicationFactor should be 2 because we want 2 schedulers. + ReplicationFactor = 2 +) + var errSchedulerIsNotRunning = errors.New("scheduler is not running") // Scheduler is responsible for queueing and dispatching queries to Queriers. @@ -111,7 +118,25 @@ func (cfg *Config) RegisterFlags(f *flag.FlagSet) { f.DurationVar(&cfg.QuerierForgetDelay, "query-scheduler.querier-forget-delay", 0, "If a querier disconnects without sending notification about graceful shutdown, the query-scheduler will keep the querier in the tenant's shard until the forget delay has passed. This feature is useful to reduce the blast radius when shuffle-sharding is enabled.") cfg.GRPCClientConfig.RegisterFlagsWithPrefix("query-scheduler.grpc-client-config", f) f.BoolVar(&cfg.UseSchedulerRing, "query-scheduler.use-scheduler-ring", false, "Set to true to have the query schedulers create and place themselves in a ring. If no frontend_address or scheduler_address are present anywhere else in the configuration, Loki will toggle this value to true.") - cfg.SchedulerRing.RegisterFlagsWithPrefix("query-scheduler.", "collectors/", f) + + // Ring + skipFlags := []string{ + "query-scheduler.ring.num-tokens", + "query-scheduler.ring.replication-factor", + } + cfg.SchedulerRing.RegisterFlagsWithPrefix("query-scheduler.", "collectors/", f, skipFlags...) + f.IntVar(&cfg.SchedulerRing.NumTokens, "query-scheduler.ring.num-tokens", NumTokens, fmt.Sprintf("IGNORED: Num tokens is fixed to %d", NumTokens)) + f.IntVar(&cfg.SchedulerRing.ReplicationFactor, "query-scheduler.ring.replication-factor", ReplicationFactor, fmt.Sprintf("IGNORED: Replication factor is fixed to %d", ReplicationFactor)) +} + +func (cfg *Config) Validate() error { + if cfg.SchedulerRing.NumTokens != NumTokens { + return errors.New("Num tokens must not be changed as it will not take effect") + } + if cfg.SchedulerRing.ReplicationFactor != ReplicationFactor { + return errors.New("Replication factor must not be changed as it will not take effect") + } + return nil } // NewScheduler creates a new Scheduler. diff --git a/pkg/storage/stores/shipper/indexshipper/indexgateway/config.go b/pkg/storage/stores/shipper/indexshipper/indexgateway/config.go index 137ef5dfa05be..884d29bf9e37c 100644 --- a/pkg/storage/stores/shipper/indexshipper/indexgateway/config.go +++ b/pkg/storage/stores/shipper/indexshipper/indexgateway/config.go @@ -3,10 +3,16 @@ package indexgateway import ( "flag" "fmt" + "github.com/pkg/errors" "github.com/grafana/loki/pkg/util/ring" ) +const ( + NumTokens = 128 + ReplicationFactor = 3 +) + // Mode represents in which mode an Index Gateway instance is running. // // Right now, two modes are supported: simple mode (default) and ring mode. @@ -47,26 +53,6 @@ const ( RingMode Mode = "ring" ) -// RingCfg is identical to ring.RingConfigWithRF with the difference that the -// ReplicationFactor field is deprecated. -type RingCfg struct { - // InternalRingCfg configures the Index Gateway ring. - ring.RingConfig `yaml:",inline"` - - // ReplicationFactor defines how many Index Gateway instances are assigned to each tenant. - // - // Whenever the store queries the ring key-value store for the Index Gateway instance responsible for tenant X, - // multiple Index Gateway instances are expected to be returned as Index Gateway might be busy/locked for specific - // reasons (this is assured by the spikey behavior of Index Gateway latencies). - ReplicationFactor int `yaml:"replication_factor"` -} - -// RegisterFlagsWithPrefix register all Index Gateway flags related to its ring but with a proper store prefix to avoid conflicts. -func (cfg *RingCfg) RegisterFlags(prefix, storePrefix string, f *flag.FlagSet) { - cfg.RegisterFlagsWithPrefix(prefix, storePrefix, f) - f.IntVar(&cfg.ReplicationFactor, "replication-factor", 3, "Deprecated: How many index gateway instances are assigned to each tenant. Use -index-gateway.shard-size instead. The shard size is also a per-tenant setting.") -} - // Config configures an Index Gateway server. type Config struct { // Mode configures in which mode the client will be running when querying and communicating with an Index Gateway instance. @@ -76,11 +62,31 @@ type Config struct { // // In case it isn't explicitly set, it follows the same behavior of the other rings (ex: using the common configuration // section and the ingester configuration by default). - Ring RingCfg `yaml:"ring,omitempty" doc:"description=Defines the ring to be used by the index gateway servers and clients in case the servers are configured to run in 'ring' mode. In case this isn't configured, this block supports inheriting configuration from the common ring section."` + Ring ring.RingConfig `yaml:"ring,omitempty" doc:"description=Defines the ring to be used by the index gateway servers and clients in case the servers are configured to run in 'ring' mode. In case this isn't configured, this block supports inheriting configuration from the common ring section."` } // RegisterFlags register all IndexGatewayClientConfig flags and all the flags of its subconfigs but with a prefix (ex: shipper). func (cfg *Config) RegisterFlags(f *flag.FlagSet) { - cfg.Ring.RegisterFlags("index-gateway.", "collectors/", f) f.StringVar((*string)(&cfg.Mode), "index-gateway.mode", SimpleMode.String(), "Defines in which mode the index gateway server will operate (default to 'simple'). It supports two modes:\n- 'simple': an index gateway server instance is responsible for handling, storing and returning requests for all indices for all tenants.\n- 'ring': an index gateway server instance is responsible for a subset of tenants instead of all tenants.") + + // Ring + skipFlags := []string{ + "index-gateway.ring.num-tokens", + "index-gateway.ring.replication-factor", + } + cfg.Ring.RegisterFlagsWithPrefix("index-gateway.", "collectors/", f, skipFlags...) + f.IntVar(&cfg.Ring.NumTokens, "index-gateway.ring.num-tokens", NumTokens, fmt.Sprintf("IGNORED: Num tokens is fixed to %d", NumTokens)) + // ReplicationFactor defines how many Index Gateway instances are assigned to each tenant. + // + // Whenever the store queries the ring key-value store for the Index Gateway instance responsible for tenant X, + // multiple Index Gateway instances are expected to be returned as Index Gateway might be busy/locked for specific + // reasons (this is assured by the spikey behavior of Index Gateway latencies). + f.IntVar(&cfg.Ring.ReplicationFactor, "replication-factor", ReplicationFactor, "Deprecated: How many index gateway instances are assigned to each tenant. Use -index-gateway.shard-size instead. The shard size is also a per-tenant setting.") +} + +func (cfg *Config) Validate() error { + if cfg.Ring.NumTokens != NumTokens { + return errors.New("Num tokens must not be changed as it will not take effect") + } + return nil } diff --git a/pkg/util/flagext/flagsetskip.go b/pkg/util/flagext/flagsetskip.go new file mode 100644 index 0000000000000..c10e05d6969a6 --- /dev/null +++ b/pkg/util/flagext/flagsetskip.go @@ -0,0 +1,55 @@ +package flagext + +import ( + "flag" + "time" +) + +type FlagSetWithSkip struct { + *flag.FlagSet + skip map[string]struct{} +} + +func NewFlagSetWithSkip(f *flag.FlagSet, skip []string) *FlagSetWithSkip { + skipMap := make(map[string]struct{}, len(skip)) + for _, s := range skip { + skipMap[s] = struct{}{} + } + return &FlagSetWithSkip{f, skipMap} +} + +func (f *FlagSetWithSkip) ToFlagSet() *flag.FlagSet { + return f.FlagSet +} + +func (f *FlagSetWithSkip) DurationVar(p *time.Duration, name string, value time.Duration, usage string) { + if _, ok := f.skip[name]; !ok { + f.FlagSet.DurationVar(p, name, value, usage) + } +} + +func (f *FlagSetWithSkip) StringVar(p *string, name string, value string, usage string) { + if _, ok := f.skip[name]; !ok { + f.FlagSet.StringVar(p, name, value, usage) + } +} + +func (f *FlagSetWithSkip) BoolVar(p *bool, name string, value bool, usage string) { + if _, ok := f.skip[name]; !ok { + f.FlagSet.BoolVar(p, name, value, usage) + } +} + +func (f *FlagSetWithSkip) IntVar(p *int, name string, value int, usage string) { + if _, ok := f.skip[name]; !ok { + f.FlagSet.IntVar(p, name, value, usage) + } +} + +func (f *FlagSetWithSkip) Var(value flag.Value, name string, usage string) { + if _, ok := f.skip[name]; !ok { + f.FlagSet.Var(value, name, usage) + } +} + +// TODO: Add more methods as needed. diff --git a/pkg/util/ring/ring_config.go b/pkg/util/ring/ring_config.go index eb9945ffcb3ec..779c40f4dad5c 100644 --- a/pkg/util/ring/ring_config.go +++ b/pkg/util/ring/ring_config.go @@ -15,6 +15,7 @@ import ( "github.com/grafana/dskit/netutil" "github.com/grafana/dskit/ring" + util_flagext "github.com/grafana/loki/pkg/util/flagext" util_log "github.com/grafana/loki/pkg/util/log" ) @@ -28,6 +29,8 @@ type RingConfig struct { // nolint:revive HeartbeatTimeout time.Duration `yaml:"heartbeat_timeout"` TokensFilePath string `yaml:"tokens_file_path"` ZoneAwarenessEnabled bool `yaml:"zone_awareness_enabled"` + NumTokens int `yaml:"num_tokens"` + ReplicationFactor int `yaml:"replication_factor"` // Instance details InstanceID string `yaml:"instance_id" doc:"default="` @@ -45,7 +48,9 @@ type RingConfig struct { // nolint:revive // RegisterFlagsWithPrefix adds the flags required to config this to the given FlagSet // storePrefix is used to set the path in the KVStore and should end with a / -func (cfg *RingConfig) RegisterFlagsWithPrefix(flagsPrefix, storePrefix string, f *flag.FlagSet) { +func (cfg *RingConfig) RegisterFlagsWithPrefix(flagsPrefix, storePrefix string, fs *flag.FlagSet, skip ...string) { + f := util_flagext.NewFlagSetWithSkip(fs, skip) + hostname, err := os.Hostname() if err != nil { level.Error(util_log.Logger).Log("msg", "failed to get hostname", "err", err) @@ -53,11 +58,13 @@ func (cfg *RingConfig) RegisterFlagsWithPrefix(flagsPrefix, storePrefix string, } // Ring flags - cfg.KVStore.RegisterFlagsWithPrefix(flagsPrefix+"ring.", storePrefix, f) + cfg.KVStore.RegisterFlagsWithPrefix(flagsPrefix+"ring.", storePrefix, f.ToFlagSet()) f.DurationVar(&cfg.HeartbeatPeriod, flagsPrefix+"ring.heartbeat-period", 15*time.Second, "Period at which to heartbeat to the ring. 0 = disabled.") f.DurationVar(&cfg.HeartbeatTimeout, flagsPrefix+"ring.heartbeat-timeout", time.Minute, "The heartbeat timeout after which compactors are considered unhealthy within the ring. 0 = never (timeout disabled).") f.StringVar(&cfg.TokensFilePath, flagsPrefix+"ring.tokens-file-path", "", "File path where tokens are stored. If empty, tokens are not stored at shutdown and restored at startup.") f.BoolVar(&cfg.ZoneAwarenessEnabled, flagsPrefix+"ring.zone-awareness-enabled", false, "True to enable zone-awareness and replicate blocks across different availability zones.") + f.IntVar(&cfg.NumTokens, flagsPrefix+"ring.num-tokens", 128, "Number of tokens to own in the ring.") + f.IntVar(&cfg.ReplicationFactor, flagsPrefix+"ring.replication-factor", 3, "Factor for data replication.") // Instance flags cfg.InstanceInterfaceNames = netutil.PrivateNetworkInterfacesWithFallback([]string{"eth0", "en0"}, util_log.Logger) @@ -123,18 +130,3 @@ func (cfg *RingConfig) ToRingConfig(replicationFactor int) ring.Config { return rc } - -// RingConfigWithRF is a wrapper for our internally used ring configuration plus the replication factor. -type RingConfigWithRF struct { // nolint:revive - // RingConfig configures the ring. - RingConfig `yaml:",inline"` - - // ReplicationFactor defines how many replicas store a single data shard. - ReplicationFactor int `yaml:"replication_factor"` -} - -// RegisterFlagsWithPrefix registers all Bloom Gateway CLI flags. -func (cfg *RingConfigWithRF) RegisterFlagsWithPrefix(prefix, storePrefix string, f *flag.FlagSet) { - cfg.RingConfig.RegisterFlagsWithPrefix(prefix, storePrefix, f) - f.IntVar(&cfg.ReplicationFactor, prefix+"replication-factor", 3, "Factor for data replication.") -} diff --git a/tools/doc-generator/writer.go b/tools/doc-generator/writer.go index a13613c7fbffd..f182d0a8600c5 100644 --- a/tools/doc-generator/writer.go +++ b/tools/doc-generator/writer.go @@ -27,18 +27,20 @@ func (w *specWriter) writeConfigBlock(b *parse.ConfigBlock, indent int) { return } + var written bool for i, entry := range b.Entries { // Add a new line to separate from the previous entry - if i > 0 { + if written && i > 0 { w.out.WriteString("\n") } - w.writeConfigEntry(entry, indent) + written = w.writeConfigEntry(entry, indent) } } // nolint:goconst -func (w *specWriter) writeConfigEntry(e *parse.ConfigEntry, indent int) { +func (w *specWriter) writeConfigEntry(e *parse.ConfigEntry, indent int) (written bool) { + written = true if e.Kind == parse.KindBlock { // If the block is a root block it will have its dedicated section in the doc, // so here we've just to write down the reference without re-iterating on it. @@ -64,6 +66,11 @@ func (w *specWriter) writeConfigEntry(e *parse.ConfigEntry, indent int) { } if e.Kind == parse.KindField || e.Kind == parse.KindSlice || e.Kind == parse.KindMap { + if strings.HasPrefix(e.Description(), "IGNORED:") { + // We skip documenting any field whose description starts with "IGNORED:". + return false + } + // Description w.writeComment(e.Description(), indent, 0) w.writeExample(e.FieldExample, indent) @@ -87,6 +94,8 @@ func (w *specWriter) writeConfigEntry(e *parse.ConfigEntry, indent int) { w.out.WriteString(pad(indent) + "[" + e.Name + ": <" + e.FieldType + ">" + defaultValue + "]\n") } } + + return written } func (w *specWriter) writeFlag(name string, indent int) { From 2544f003cf334c55150827b7be43bed2a7477c75 Mon Sep 17 00:00:00 2001 From: Salva Corts Date: Fri, 15 Mar 2024 11:21:48 +0100 Subject: [PATCH 17/25] chore: Add summary for received filters in bloom gateway (#12158) --- pkg/bloomgateway/bloomgateway.go | 1 + pkg/bloomgateway/metrics.go | 8 +++ pkg/querier/queryrange/metrics.go | 56 +++++++++++++++++++ pkg/querier/queryrange/roundtrip.go | 2 + .../indexshipper/indexgateway/gateway.go | 1 - 5 files changed, 67 insertions(+), 1 deletion(-) diff --git a/pkg/bloomgateway/bloomgateway.go b/pkg/bloomgateway/bloomgateway.go index 515b27b01b2cf..4138ff4c1beb2 100644 --- a/pkg/bloomgateway/bloomgateway.go +++ b/pkg/bloomgateway/bloomgateway.go @@ -218,6 +218,7 @@ func (g *Gateway) FilterChunkRefs(ctx context.Context, req *logproto.FilterChunk } filters := syntax.ExtractLineFilters(req.Plan.AST) + g.metrics.receivedFilters.Observe(float64(len(filters))) // Shortcut if request does not contain filters if len(filters) == 0 { diff --git a/pkg/bloomgateway/metrics.go b/pkg/bloomgateway/metrics.go index 9058a90078ac5..0e428b248d1e3 100644 --- a/pkg/bloomgateway/metrics.go +++ b/pkg/bloomgateway/metrics.go @@ -18,6 +18,7 @@ type serverMetrics struct { filteredSeries prometheus.Histogram requestedChunks prometheus.Histogram filteredChunks prometheus.Histogram + receivedFilters prometheus.Histogram } func newMetrics(registerer prometheus.Registerer, namespace, subsystem string) *metrics { @@ -66,6 +67,13 @@ func newServerMetrics(registerer prometheus.Registerer, namespace, subsystem str Help: "Total amount of chunk refs filtered by bloom-gateway", Buckets: prometheus.ExponentialBucketsRange(1, 100e3, 10), }), + receivedFilters: promauto.With(registerer).NewHistogram(prometheus.HistogramOpts{ + Namespace: namespace, + Subsystem: subsystem, + Name: "request_filters", + Help: "Number of filters per request.", + Buckets: prometheus.ExponentialBuckets(1, 2, 9), // 1 -> 256 + }), } } diff --git a/pkg/querier/queryrange/metrics.go b/pkg/querier/queryrange/metrics.go index 390f2c81d771e..9482becf98817 100644 --- a/pkg/querier/queryrange/metrics.go +++ b/pkg/querier/queryrange/metrics.go @@ -1,9 +1,13 @@ package queryrange import ( + "context" + "github.com/prometheus/client_golang/prometheus" + "github.com/prometheus/client_golang/prometheus/promauto" "github.com/grafana/loki/pkg/logql" + "github.com/grafana/loki/pkg/logql/syntax" "github.com/grafana/loki/pkg/querier/queryrange/queryrangebase" ) @@ -13,6 +17,7 @@ type Metrics struct { *MiddlewareMapperMetrics *SplitByMetrics *LogResultCacheMetrics + *QueryMetrics *queryrangebase.ResultsCacheMetrics } @@ -35,6 +40,57 @@ func NewMetrics(registerer prometheus.Registerer, metricsNamespace string) *Metr MiddlewareMapperMetrics: NewMiddlewareMapperMetrics(registerer), SplitByMetrics: NewSplitByMetrics(registerer), LogResultCacheMetrics: NewLogResultCacheMetrics(registerer), + QueryMetrics: NewMiddlewareQueryMetrics(registerer, metricsNamespace), ResultsCacheMetrics: queryrangebase.NewResultsCacheMetrics(registerer), } } + +type QueryMetrics struct { + receivedFilters prometheus.Histogram +} + +func NewMiddlewareQueryMetrics(registerer prometheus.Registerer, metricsNamespace string) *QueryMetrics { + return &QueryMetrics{ + receivedFilters: promauto.With(registerer).NewHistogram(prometheus.HistogramOpts{ + Namespace: metricsNamespace, + Name: "query_frontend_query_filters", + Help: "Number of filters per query.", + Buckets: prometheus.ExponentialBuckets(1, 2, 9), // 1 -> 256 + }), + } +} + +// QueryMetricsMiddleware can be inserted into the middleware chain to expose timing information. +func QueryMetricsMiddleware(metrics *QueryMetrics) queryrangebase.Middleware { + return queryrangebase.MiddlewareFunc(func(next queryrangebase.Handler) queryrangebase.Handler { + return queryrangebase.HandlerFunc(func(ctx context.Context, req queryrangebase.Request) (queryrangebase.Response, error) { + var expr syntax.Expr + switch r := req.(type) { + case *LokiRequest: + if r.Plan != nil { + expr = r.Plan.AST + } + case *LokiInstantRequest: + if r.Plan != nil { + expr = r.Plan.AST + } + default: + return next.Do(ctx, req) + } + + // The plan should always be present, but if it's not, we'll parse the query to get the filters. + if expr == nil { + var err error + expr, err = syntax.ParseExpr(req.GetQuery()) + if err != nil { + return nil, err + } + } + + filters := syntax.ExtractLineFilters(expr) + metrics.receivedFilters.Observe(float64(len(filters))) + + return next.Do(ctx, req) + }) + }) +} diff --git a/pkg/querier/queryrange/roundtrip.go b/pkg/querier/queryrange/roundtrip.go index 5532eab989c1e..5184ef62bb13c 100644 --- a/pkg/querier/queryrange/roundtrip.go +++ b/pkg/querier/queryrange/roundtrip.go @@ -426,6 +426,7 @@ func NewLogFilterTripperware(cfg Config, engineOpts logql.EngineOpts, log log.Lo statsHandler := indexStatsTripperware.Wrap(next) queryRangeMiddleware := []base.Middleware{ + QueryMetricsMiddleware(metrics.QueryMetrics), StatsCollectorMiddleware(), NewLimitsMiddleware(limits), NewQuerySizeLimiterMiddleware(schema.Configs, engineOpts, log, limits, statsHandler), @@ -703,6 +704,7 @@ func NewMetricTripperware(cfg Config, engineOpts logql.EngineOpts, log log.Logge statsHandler := indexStatsTripperware.Wrap(next) queryRangeMiddleware := []base.Middleware{ + QueryMetricsMiddleware(metrics.QueryMetrics), StatsCollectorMiddleware(), NewLimitsMiddleware(limits), } diff --git a/pkg/storage/stores/shipper/indexshipper/indexgateway/gateway.go b/pkg/storage/stores/shipper/indexshipper/indexgateway/gateway.go index e5aa0bcc73524..a2325bd5c51bb 100644 --- a/pkg/storage/stores/shipper/indexshipper/indexgateway/gateway.go +++ b/pkg/storage/stores/shipper/indexshipper/indexgateway/gateway.go @@ -229,7 +229,6 @@ func (g *Gateway) GetChunkRef(ctx context.Context, req *logproto.GetChunkRefRequ // Extract LineFiltersExpr from the plan. If there is none, we can short-circuit and return before making a req // to the bloom-gateway (through the g.bloomQuerier) - // TODO(owen-d): metrics for number of filters seen, but probably do elsewhere (in query-frontend?) if len(syntax.ExtractLineFilters(req.Plan.AST)) == 0 { return result, nil } From 60dcee1b34e895f0b552b9302b9ad234a610e553 Mon Sep 17 00:00:00 2001 From: Poyzan <31743851+poyzannur@users.noreply.github.com> Date: Fri, 15 Mar 2024 13:56:41 +0300 Subject: [PATCH 18/25] fix: data race in seriesIterator, use atomic int (#12223) --- pkg/iter/sample_iterator.go | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/pkg/iter/sample_iterator.go b/pkg/iter/sample_iterator.go index 632ed9106df15..ce943a8328c75 100644 --- a/pkg/iter/sample_iterator.go +++ b/pkg/iter/sample_iterator.go @@ -3,6 +3,7 @@ package iter import ( "container/heap" "context" + "go.uber.org/atomic" "io" "sync" @@ -521,7 +522,7 @@ func NewSampleQueryResponseIterator(resp *logproto.SampleQueryResponse) SampleIt } type seriesIterator struct { - i int + i *atomic.Int32 series logproto.Series } @@ -567,14 +568,14 @@ func NewMultiSeriesIterator(series []logproto.Series) SampleIterator { // NewSeriesIterator iterates over sample in a series. func NewSeriesIterator(series logproto.Series) SampleIterator { return &seriesIterator{ - i: -1, + i: atomic.NewInt32(-1), series: series, } } func (i *seriesIterator) Next() bool { - i.i++ - return i.i < len(i.series.Samples) + i.i.Inc() + return int(i.i.Load()) < len(i.series.Samples) } func (i *seriesIterator) Error() error { @@ -590,7 +591,7 @@ func (i *seriesIterator) StreamHash() uint64 { } func (i *seriesIterator) Sample() logproto.Sample { - return i.series.Samples[i.i] + return i.series.Samples[i.i.Load()] } func (i *seriesIterator) Close() error { From b9266883a09d06b6735b7a356f1af07b30541fc3 Mon Sep 17 00:00:00 2001 From: Poyzan <31743851+poyzannur@users.noreply.github.com> Date: Fri, 15 Mar 2024 13:56:47 +0300 Subject: [PATCH 19/25] fix: data race in unit test (#12224) --- pkg/compactor/deletion/delete_requests_client_test.go | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/pkg/compactor/deletion/delete_requests_client_test.go b/pkg/compactor/deletion/delete_requests_client_test.go index 44c68e26b9e93..2268914711963 100644 --- a/pkg/compactor/deletion/delete_requests_client_test.go +++ b/pkg/compactor/deletion/delete_requests_client_test.go @@ -90,11 +90,11 @@ func (m *mockCompactorClient) SetDeleteRequests(d []DeleteRequest) { } func (m *mockCompactorClient) GetAllDeleteRequestsForUser(_ context.Context, _ string) ([]DeleteRequest, error) { + m.mx.Lock() + defer m.mx.Unlock() if m.err != nil { return nil, m.err } - m.mx.Lock() - defer m.mx.Unlock() return m.delRequests, nil } @@ -113,5 +113,7 @@ func (m *mockCompactorClient) Name() string { func (m *mockCompactorClient) Stop() {} func (m *mockCompactorClient) SetErr(err error) { + m.mx.Lock() + defer m.mx.Unlock() m.err = err } From 3dd5b706aa93f3ab190ba2033f75c88b005841ef Mon Sep 17 00:00:00 2001 From: Poyzan <31743851+poyzannur@users.noreply.github.com> Date: Fri, 15 Mar 2024 14:37:34 +0300 Subject: [PATCH 20/25] fix: correct use of atomic int (#12225) --- pkg/iter/sample_iterator.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pkg/iter/sample_iterator.go b/pkg/iter/sample_iterator.go index ce943a8328c75..4c17c473e9f0d 100644 --- a/pkg/iter/sample_iterator.go +++ b/pkg/iter/sample_iterator.go @@ -574,8 +574,8 @@ func NewSeriesIterator(series logproto.Series) SampleIterator { } func (i *seriesIterator) Next() bool { - i.i.Inc() - return int(i.i.Load()) < len(i.series.Samples) + tmp := i.i.Add(1) + return int(tmp) < len(i.series.Samples) } func (i *seriesIterator) Error() error { From 0d1a3694f5fd2d29c69a4b80a3fe9ba41a9fb697 Mon Sep 17 00:00:00 2001 From: Paul Rogers <129207811+paul1r@users.noreply.github.com> Date: Fri, 15 Mar 2024 09:24:37 -0400 Subject: [PATCH 21/25] test: Don't start server until all configured (#12226) --- pkg/storage/chunk/client/gcp/gcs_object_client_test.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pkg/storage/chunk/client/gcp/gcs_object_client_test.go b/pkg/storage/chunk/client/gcp/gcs_object_client_test.go index 560c2e5297ef2..ac3e7a77dcd61 100644 --- a/pkg/storage/chunk/client/gcp/gcs_object_client_test.go +++ b/pkg/storage/chunk/client/gcp/gcs_object_client_test.go @@ -246,7 +246,7 @@ func fakeHTTPRespondingServer(t *testing.T, code int) *httptest.Server { } func fakeSleepingServer(t *testing.T, responseSleep, connectSleep time.Duration, closeOnNew, closeOnActive bool) *httptest.Server { - server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + server := httptest.NewUnstartedServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { // sleep on response to mimic server overload time.Sleep(responseSleep) })) @@ -264,6 +264,6 @@ func fakeSleepingServer(t *testing.T, responseSleep, connectSleep time.Duration, } } t.Cleanup(server.Close) - + server.Start() return server } From 4ce5fa895421eab1f078ede0f0ff7996b54e90b9 Mon Sep 17 00:00:00 2001 From: Michel Hollands <42814411+MichelHollands@users.noreply.github.com> Date: Fri, 15 Mar 2024 13:43:41 +0000 Subject: [PATCH 22/25] feat: update logcli so it tries to load the latest version of the schemaconfig (#11852) Signed-off-by: Michel Hollands Signed-off-by: Michel Hollands Co-authored-by: Dylan Guedes --- CHANGELOG.md | 2 + pkg/logcli/query/query.go | 101 +++++++++++++++---------- pkg/logcli/query/query_test.go | 132 +++++++++++++++++++++++++++++++-- 3 files changed, 190 insertions(+), 45 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 512b9b0bdbb67..2c19c74a065a1 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -110,6 +110,8 @@ #### LogCLI +* [11852](https://github.com/grafana/loki/pull/11852) **MichelHollands**: feat: update logcli so it tries to load the latest version of the schemaconfig + #### Mixins * [11087](https://github.com/grafana/loki/pull/11087) **JoaoBraveCoding**: Adds structured metadata panels for ingested data diff --git a/pkg/logcli/query/query.go b/pkg/logcli/query/query.go index fc5be5f393cb2..7e8c86e08ad15 100644 --- a/pkg/logcli/query/query.go +++ b/pkg/logcli/query/query.go @@ -2,6 +2,7 @@ package query import ( "context" + stdErrors "errors" "flag" "fmt" "io" @@ -10,7 +11,6 @@ import ( "sync" "time" - "github.com/grafana/dskit/multierror" "github.com/grafana/dskit/user" "github.com/pkg/errors" "github.com/prometheus/client_golang/prometheus" @@ -395,6 +395,41 @@ func maxTime(t1, t2 time.Time) time.Time { return t2 } +func getLatestConfig(client chunk.ObjectClient, orgID string) (*config.SchemaConfig, error) { + // Get the latest + iteration := 0 + searchFor := fmt.Sprintf("%s-%s.yaml", orgID, schemaConfigFilename) // schemaconfig-tenant.yaml + var loadedSchema *config.SchemaConfig + for { + if iteration != 0 { + searchFor = fmt.Sprintf("%s-%s-%d.yaml", orgID, schemaConfigFilename, iteration) // tenant-schemaconfig-1.yaml + } + tempSchema, err := LoadSchemaUsingObjectClient(client, searchFor) + if err == errNotExists { + break + } + if err != nil { + return nil, err + } + + loadedSchema = tempSchema + iteration++ + } + if loadedSchema != nil { + return loadedSchema, nil + } + + searchFor = fmt.Sprintf("%s.yaml", schemaConfigFilename) // schemaconfig.yaml for backwards compatibility + loadedSchema, err := LoadSchemaUsingObjectClient(client, searchFor) + if err == nil { + return loadedSchema, nil + } + if err != errNotExists { + return nil, err + } + return nil, errNotExists +} + // DoLocalQuery executes the query against the local store using a Loki configuration file. func (q *Query) DoLocalQuery(out output.LogOutput, statistics bool, orgID string, useRemoteSchema bool) error { var conf loki.Config @@ -417,15 +452,10 @@ func (q *Query) DoLocalQuery(out output.LogOutput, statistics bool, orgID string return err } - objects := []string{ - fmt.Sprintf("%s-%s.yaml", orgID, schemaConfigFilename), // schemaconfig-tenant.yaml - fmt.Sprintf("%s.yaml", schemaConfigFilename), // schemaconfig.yaml for backwards compatibility - } - loadedSchema, err := LoadSchemaUsingObjectClient(client, objects...) + loadedSchema, err := getLatestConfig(client, orgID) if err != nil { return err } - conf.SchemaConfig = *loadedSchema } @@ -484,10 +514,6 @@ func (q *Query) DoLocalQuery(out output.LogOutput, statistics bool, orgID string query = eng.Query(params) } - if err != nil { - return err - } - // execute the query ctx := user.InjectOrgID(context.Background(), orgID) result, err := query.Exec(ctx) @@ -521,41 +547,40 @@ func GetObjectClient(store string, conf loki.Config, cm storage.ClientMetrics) ( return oc, nil } +var errNotExists = stdErrors.New("doesn't exist") + type schemaConfigSection struct { config.SchemaConfig `yaml:"schema_config"` } -// LoadSchemaUsingObjectClient returns the loaded schema from the first found object -func LoadSchemaUsingObjectClient(oc chunk.ObjectClient, names ...string) (*config.SchemaConfig, error) { - errs := multierror.New() - for _, name := range names { - schema, err := func(name string) (*config.SchemaConfig, error) { - ctx, cancel := context.WithDeadline(context.Background(), time.Now().Add(1*time.Minute)) - defer cancel() - rdr, _, err := oc.GetObject(ctx, name) - if err != nil { - return nil, errors.Wrapf(err, "failed to load schema object '%s'", name) - } - defer rdr.Close() +// LoadSchemaUsingObjectClient returns the loaded schema from the object with the given name +func LoadSchemaUsingObjectClient(oc chunk.ObjectClient, name string) (*config.SchemaConfig, error) { + ctx, cancel := context.WithDeadline(context.Background(), time.Now().Add(1*time.Minute)) + defer cancel() - decoder := yaml.NewDecoder(rdr) - decoder.SetStrict(true) - section := schemaConfigSection{} - err = decoder.Decode(§ion) - if err != nil { - return nil, err - } + ok, err := oc.ObjectExists(ctx, name) + if !ok { + return nil, errNotExists + } + if err != nil { + return nil, err + } - return §ion.SchemaConfig, nil - }(name) + rdr, _, err := oc.GetObject(ctx, name) + if err != nil { + return nil, errors.Wrapf(err, "failed to load schema object '%s'", name) + } + defer rdr.Close() - if err != nil { - errs = append(errs, err) - continue - } - return schema, nil + decoder := yaml.NewDecoder(rdr) + decoder.SetStrict(true) + section := schemaConfigSection{} + err = decoder.Decode(§ion) + if err != nil { + return nil, err } - return nil, errs.Err() + + return §ion.SchemaConfig, nil } // SetInstant makes the Query an instant type diff --git a/pkg/logcli/query/query_test.go b/pkg/logcli/query/query_test.go index 1b4c18f5265e0..605155bca3db2 100644 --- a/pkg/logcli/query/query_test.go +++ b/pkg/logcli/query/query_test.go @@ -3,6 +3,7 @@ package query import ( "bytes" "context" + "errors" "fmt" "os" "path/filepath" @@ -23,6 +24,7 @@ import ( "github.com/grafana/loki/pkg/logql" "github.com/grafana/loki/pkg/loki" "github.com/grafana/loki/pkg/storage" + "github.com/grafana/loki/pkg/storage/chunk/client" "github.com/grafana/loki/pkg/storage/chunk/client/local" "github.com/grafana/loki/pkg/storage/config" "github.com/grafana/loki/pkg/util/marshal" @@ -406,7 +408,6 @@ func Test_batch(t *testing.T) { type testQueryClient struct { engine *logql.Engine queryRangeCalls int - orgID string } func newTestQueryClient(testStreams ...logproto.Stream) *testQueryClient { @@ -484,6 +485,17 @@ func (t *testQueryClient) GetVolumeRange(_ *volume.Query) (*loghttp.QueryRespons panic("not implemented") } +var legacySchemaConfigContents = `schema_config: + configs: + - from: 2020-05-15 + store: boltdb-shipper + object_store: gcs + schema: v10 + index: + prefix: index_ + period: 168h +` + var schemaConfigContents = `schema_config: configs: - from: 2020-05-15 @@ -501,10 +513,35 @@ var schemaConfigContents = `schema_config: prefix: index_ period: 24h ` +var schemaConfigContents2 = `schema_config: + configs: + - from: 2020-05-15 + store: boltdb-shipper + object_store: gcs + schema: v10 + index: + prefix: index_ + period: 168h + - from: 2020-07-31 + store: boltdb-shipper + object_store: gcs + schema: v11 + index: + prefix: index_ + period: 24h + - from: 2020-09-30 + store: boltdb-shipper + object_store: gcs + schema: v12 + index: + prefix: index_ + period: 24h +` +var cm = storage.NewClientMetrics() -func TestLoadFromURL(t *testing.T) { +func setupTestEnv(t *testing.T) (string, client.ObjectClient) { + t.Helper() tmpDir := t.TempDir() - conf := loki.Config{ StorageConfig: storage.Config{ FSConfig: local.FSConfig{ @@ -513,11 +550,19 @@ func TestLoadFromURL(t *testing.T) { }, } - cm := storage.NewClientMetrics() client, err := GetObjectClient(config.StorageTypeFileSystem, conf, cm) require.NoError(t, err) require.NotNil(t, client) + _, err = getLatestConfig(client, "456") + require.Error(t, err) + require.True(t, errors.Is(err, errNotExists)) + + return tmpDir, client +} + +func TestLoadFromURL(t *testing.T) { + tmpDir, client := setupTestEnv(t) filename := "schemaconfig.yaml" // Missing schemaconfig.yaml file should error @@ -537,12 +582,85 @@ func TestLoadFromURL(t *testing.T) { require.NoError(t, err) require.NotNil(t, schemaConfig) +} - // Load multiple schemaconfig files - schemaConfig, err = LoadSchemaUsingObjectClient(client, "foo.yaml", filename, "bar.yaml") +func TestMultipleConfigs(t *testing.T) { + tmpDir, client := setupTestEnv(t) + err := os.WriteFile( + filepath.Join(tmpDir, "456-schemaconfig.yaml"), + []byte(schemaConfigContents), + 0666, + ) require.NoError(t, err) - require.NotNil(t, schemaConfig) + + config, err := getLatestConfig(client, "456") + require.NoError(t, err) + require.NotNil(t, config) + require.Len(t, config.Configs, 2) + + err = os.WriteFile( + filepath.Join(tmpDir, "456-schemaconfig-1.yaml"), + []byte(schemaConfigContents2), + 0666, + ) + require.NoError(t, err) + + config, err = getLatestConfig(client, "456") + require.NoError(t, err) + require.NotNil(t, config) + require.Len(t, config.Configs, 3) +} + +func TestMultipleConfigsIncludingLegacy(t *testing.T) { + tmpDir, client := setupTestEnv(t) + + err := os.WriteFile( + filepath.Join(tmpDir, "schemaconfig.yaml"), + []byte(legacySchemaConfigContents), + 0666, + ) + require.NoError(t, err) + + err = os.WriteFile( + filepath.Join(tmpDir, "456-schemaconfig.yaml"), + []byte(schemaConfigContents), + 0666, + ) + require.NoError(t, err) + + config, err := getLatestConfig(client, "456") + require.NoError(t, err) + require.NotNil(t, config) + require.Len(t, config.Configs, 2) + + err = os.WriteFile( + filepath.Join(tmpDir, "456-schemaconfig-1.yaml"), + []byte(schemaConfigContents2), + 0666, + ) + require.NoError(t, err) + + config, err = getLatestConfig(client, "456") + require.NoError(t, err) + require.NotNil(t, config) + require.Len(t, config.Configs, 3) +} + +func TestLegacyConfigOnly(t *testing.T) { + tmpDir, client := setupTestEnv(t) + + err := os.WriteFile( + filepath.Join(tmpDir, "schemaconfig.yaml"), + []byte(legacySchemaConfigContents), + 0666, + ) + require.NoError(t, err) + + config, err := getLatestConfig(client, "456") + require.NoError(t, err) + require.NotNil(t, config) + require.Len(t, config.Configs, 1) } func TestDurationCeilDiv(t *testing.T) { From 347fd4dfa38cdea13a4dbc0ec6f525f36f7a1364 Mon Sep 17 00:00:00 2001 From: Sheikh-Abubaker Date: Fri, 15 Mar 2024 19:32:32 +0530 Subject: [PATCH 23/25] feat: area/helm: Modified helm template to use http_listen_port and grpc_listen_port instead of hardcoded value (#11646) Signed-off-by: Sheikh-Abubaker Co-authored-by: Alberto Chiusole Co-authored-by: Michel Hollands <42814411+MichelHollands@users.noreply.github.com> --- production/helm/loki/CHANGELOG.md | 4 ++++ production/helm/loki/Chart.yaml | 2 +- production/helm/loki/README.md | 2 +- production/helm/loki/templates/_helpers.tpl | 14 +++++++------- .../backend/query-scheduler-discovery.yaml | 4 ++-- .../backend/service-backend-headless.yaml | 4 ++-- .../loki/templates/backend/service-backend.yaml | 4 ++-- .../templates/backend/statefulset-backend.yaml | 4 ++-- .../templates/monitoring/_helpers-monitoring.tpl | 4 ++-- .../helm/loki/templates/read/deployment-read.yaml | 6 +++--- .../loki/templates/read/service-read-headless.yaml | 4 ++-- .../helm/loki/templates/read/service-read.yaml | 4 ++-- .../helm/loki/templates/read/statefulset-read.yaml | 4 ++-- .../templates/single-binary/service-headless.yaml | 2 +- .../helm/loki/templates/single-binary/service.yaml | 4 ++-- .../loki/templates/single-binary/statefulset.yaml | 4 ++-- .../table-manager/deployment-table-manager.yaml | 4 ++-- .../table-manager/service-table-manager.yaml | 4 ++-- .../templates/write/service-write-headless.yaml | 4 ++-- .../helm/loki/templates/write/service-write.yaml | 4 ++-- .../loki/templates/write/statefulset-write.yaml | 4 ++-- 21 files changed, 47 insertions(+), 43 deletions(-) diff --git a/production/helm/loki/CHANGELOG.md b/production/helm/loki/CHANGELOG.md index 2e372fe1d1ffd..54293fc955988 100644 --- a/production/helm/loki/CHANGELOG.md +++ b/production/helm/loki/CHANGELOG.md @@ -13,6 +13,10 @@ Entries should include a reference to the pull request that introduced the chang [//]: # ( : do not remove this line. This locator is used by the CI pipeline to automatically create a changelog entry for each new Loki release. Add other chart versions and respective changelog entries bellow this line.) +## 5.44.0 + +- [FEATURE] Modified helm template to use parameters http_listen_port and grpc_listen_port instead of hardcoded values. + ## 5.43.7 - [BUGFIX] allow to configure http_config for ruler diff --git a/production/helm/loki/Chart.yaml b/production/helm/loki/Chart.yaml index 025037f888807..e8eec882ec4fc 100644 --- a/production/helm/loki/Chart.yaml +++ b/production/helm/loki/Chart.yaml @@ -3,7 +3,7 @@ name: loki description: Helm chart for Grafana Loki in simple, scalable mode type: application appVersion: 2.9.4 -version: 5.43.7 +version: 5.44.0 home: https://grafana.github.io/helm-charts sources: - https://github.com/grafana/loki diff --git a/production/helm/loki/README.md b/production/helm/loki/README.md index 46c31bb2410b8..3bbdb2fdb7516 100644 --- a/production/helm/loki/README.md +++ b/production/helm/loki/README.md @@ -1,6 +1,6 @@ # loki -![Version: 5.43.7](https://img.shields.io/badge/Version-5.43.7-informational?style=flat-square) ![Type: application](https://img.shields.io/badge/Type-application-informational?style=flat-square) ![AppVersion: 2.9.4](https://img.shields.io/badge/AppVersion-2.9.4-informational?style=flat-square) +![Version: 5.44.0](https://img.shields.io/badge/Version-5.44.0-informational?style=flat-square) ![Type: application](https://img.shields.io/badge/Type-application-informational?style=flat-square) ![AppVersion: 2.9.4](https://img.shields.io/badge/AppVersion-2.9.4-informational?style=flat-square) Helm chart for Grafana Loki in simple, scalable mode diff --git a/production/helm/loki/templates/_helpers.tpl b/production/helm/loki/templates/_helpers.tpl index 06884953103fc..ccd5f1725178c 100644 --- a/production/helm/loki/templates/_helpers.tpl +++ b/production/helm/loki/templates/_helpers.tpl @@ -567,10 +567,10 @@ Params: service: name: {{ $serviceName }} port: - number: 3100 + number: {{ .Values.loki.server.http_listen_port }} {{- else }} serviceName: {{ $serviceName }} - servicePort: 3100 + servicePort: {{ .Values.loki.server.http_listen_port }} {{- end -}} {{- end -}} {{- end -}} @@ -613,7 +613,7 @@ Create the service endpoint including port for MinIO. {{- $isSingleBinary := eq (include "loki.deployment.isSingleBinary" .) "true" -}} {{- $url := printf "%s.%s.svc.%s.:%s" (include "loki.gatewayFullname" .) .Release.Namespace .Values.global.clusterDomain (.Values.gateway.service.port | toString) }} {{- if and $isSingleBinary (not .Values.gateway.enabled) }} - {{- $url = printf "%s.%s.svc.%s.:3100" (include "loki.singleBinaryFullname" .) .Release.Namespace .Values.global.clusterDomain }} + {{- $url = printf "%s.%s.svc.%s.:%s" (include "loki.singleBinaryFullname" .) .Release.Namespace .Values.global.clusterDomain (.Values.loki.server.http_listen_port | toString) }} {{- end }} {{- printf "%s" $url -}} {{- end -}} @@ -726,9 +726,9 @@ http { {{- $writeHost = include "loki.singleBinaryFullname" .}} {{- end }} - {{- $writeUrl := printf "http://%s.%s.svc.%s:3100" $writeHost .Release.Namespace .Values.global.clusterDomain }} - {{- $readUrl := printf "http://%s.%s.svc.%s:3100" $readHost .Release.Namespace .Values.global.clusterDomain }} - {{- $backendUrl := printf "http://%s.%s.svc.%s:3100" $backendHost .Release.Namespace .Values.global.clusterDomain }} + {{- $writeUrl := printf "http://%s.%s.svc.%s:%s" $writeHost .Release.Namespace .Values.global.clusterDomain (.Values.loki.server.http_listen_port | toString) }} + {{- $readUrl := printf "http://%s.%s.svc.%s:%s" $readHost .Release.Namespace .Values.global.clusterDomain (.Values.loki.server.http_listen_port | toString) }} + {{- $backendUrl := printf "http://%s.%s.svc.%s:%s" $backendHost .Release.Namespace .Values.global.clusterDomain (.Values.loki.server.http_listen_port | toString) }} {{- if .Values.gateway.nginxConfig.customWriteUrl }} {{- $writeUrl = .Values.gateway.nginxConfig.customWriteUrl }} @@ -896,7 +896,7 @@ enableServiceLinks: false {{- $isSimpleScalable := eq (include "loki.deployment.isScalable" .) "true" -}} {{- $schedulerAddress := ""}} {{- if and $isSimpleScalable (not .Values.read.legacyReadTarget ) -}} -{{- $schedulerAddress = printf "query-scheduler-discovery.%s.svc.%s.:9095" .Release.Namespace .Values.global.clusterDomain -}} +{{- $schedulerAddress = printf "query-scheduler-discovery.%s.svc.%s.:%s" .Release.Namespace .Values.global.clusterDomain (.Values.loki.server.grpc_listen_port | toString) -}} {{- end -}} {{- printf "%s" $schedulerAddress }} {{- end }} diff --git a/production/helm/loki/templates/backend/query-scheduler-discovery.yaml b/production/helm/loki/templates/backend/query-scheduler-discovery.yaml index a9dedbb545649..527fa13cfa497 100644 --- a/production/helm/loki/templates/backend/query-scheduler-discovery.yaml +++ b/production/helm/loki/templates/backend/query-scheduler-discovery.yaml @@ -15,11 +15,11 @@ spec: publishNotReadyAddresses: true ports: - name: http-metrics - port: 3100 + port: {{ .Values.loki.server.http_listen_port }} targetPort: http-metrics protocol: TCP - name: grpc - port: 9095 + port: {{ .Values.loki.server.grpc_listen_port }} targetPort: grpc protocol: TCP selector: diff --git a/production/helm/loki/templates/backend/service-backend-headless.yaml b/production/helm/loki/templates/backend/service-backend-headless.yaml index 2ed4a9ae18c6a..0755be66d5fee 100644 --- a/production/helm/loki/templates/backend/service-backend-headless.yaml +++ b/production/helm/loki/templates/backend/service-backend-headless.yaml @@ -28,11 +28,11 @@ spec: clusterIP: None ports: - name: http-metrics - port: 3100 + port: {{ .Values.loki.server.http_listen_port }} targetPort: http-metrics protocol: TCP - name: grpc - port: 9095 + port: {{ .Values.loki.server.grpc_listen_port }} targetPort: grpc protocol: TCP selector: diff --git a/production/helm/loki/templates/backend/service-backend.yaml b/production/helm/loki/templates/backend/service-backend.yaml index 47af89e148cc1..cd1bd3b9b69dc 100644 --- a/production/helm/loki/templates/backend/service-backend.yaml +++ b/production/helm/loki/templates/backend/service-backend.yaml @@ -25,11 +25,11 @@ spec: type: ClusterIP ports: - name: http-metrics - port: 3100 + port: {{ .Values.loki.server.http_listen_port }} targetPort: http-metrics protocol: TCP - name: grpc - port: 9095 + port: {{ .Values.loki.server.grpc_listen_port }} targetPort: grpc protocol: TCP selector: diff --git a/production/helm/loki/templates/backend/statefulset-backend.yaml b/production/helm/loki/templates/backend/statefulset-backend.yaml index 1b1be055f6266..97e110ea2d542 100644 --- a/production/helm/loki/templates/backend/statefulset-backend.yaml +++ b/production/helm/loki/templates/backend/statefulset-backend.yaml @@ -161,10 +161,10 @@ spec: {{- end }} ports: - name: http-metrics - containerPort: 3100 + containerPort: {{ .Values.loki.server.http_listen_port }} protocol: TCP - name: grpc - containerPort: 9095 + containerPort: {{ .Values.loki.server.grpc_listen_port }} protocol: TCP - name: http-memberlist containerPort: 7946 diff --git a/production/helm/loki/templates/monitoring/_helpers-monitoring.tpl b/production/helm/loki/templates/monitoring/_helpers-monitoring.tpl index e54f13004d341..5a8e8b2ce4176 100644 --- a/production/helm/loki/templates/monitoring/_helpers-monitoring.tpl +++ b/production/helm/loki/templates/monitoring/_helpers-monitoring.tpl @@ -3,9 +3,9 @@ Client definition for LogsInstance */}} {{- define "loki.logsInstanceClient" -}} {{- $isSingleBinary := eq (include "loki.deployment.isSingleBinary" .) "true" -}} -{{- $url := printf "http://%s.%s.svc.%s:3100/loki/api/v1/push" (include "loki.writeFullname" .) .Release.Namespace .Values.global.clusterDomain }} +{{- $url := printf "http://%s.%s.svc.%s:%s/loki/api/v1/push" (include "loki.writeFullname" .) .Release.Namespace .Values.global.clusterDomain .Values.loki.server.http_listen_port }} {{- if $isSingleBinary }} - {{- $url = printf "http://%s.%s.svc.%s:3100/loki/api/v1/push" (include "loki.singleBinaryFullname" .) .Release.Namespace .Values.global.clusterDomain }} + {{- $url = printf "http://%s.%s.svc.%s:%s/loki/api/v1/push" (include "loki.singleBinaryFullname" .) .Release.Namespace .Values.global.clusterDomain .Values.loki.server.http_listen_port }} {{- else if .Values.gateway.enabled -}} {{- $url = printf "http://%s.%s.svc.%s/loki/api/v1/push" (include "loki.gatewayFullname" .) .Release.Namespace .Values.global.clusterDomain }} {{- end -}} diff --git a/production/helm/loki/templates/read/deployment-read.yaml b/production/helm/loki/templates/read/deployment-read.yaml index a5e7524f2a05f..dbe8f531ed188 100644 --- a/production/helm/loki/templates/read/deployment-read.yaml +++ b/production/helm/loki/templates/read/deployment-read.yaml @@ -71,16 +71,16 @@ spec: - -config.file=/etc/loki/config/config.yaml - -target={{ .Values.read.targetModule }} - -legacy-read-mode=false - - -common.compactor-grpc-address={{ include "loki.backendFullname" . }}.{{ .Release.Namespace }}.svc.{{ .Values.global.clusterDomain }}:9095 + - -common.compactor-grpc-address={{ include "loki.backendFullname" . }}.{{ .Release.Namespace }}.svc.{{ .Values.global.clusterDomain }}:{{ .Values.loki.server.grpc_listen_port }} {{- with .Values.read.extraArgs }} {{- toYaml . | nindent 12 }} {{- end }} ports: - name: http-metrics - containerPort: 3100 + containerPort: {{ .Values.loki.server.http_listen_port }} protocol: TCP - name: grpc - containerPort: 9095 + containerPort: {{ .Values.loki.server.grpc_listen_port }} protocol: TCP - name: http-memberlist containerPort: 7946 diff --git a/production/helm/loki/templates/read/service-read-headless.yaml b/production/helm/loki/templates/read/service-read-headless.yaml index e3c440082b4f3..14ba0f62f9f8a 100644 --- a/production/helm/loki/templates/read/service-read-headless.yaml +++ b/production/helm/loki/templates/read/service-read-headless.yaml @@ -28,11 +28,11 @@ spec: clusterIP: None ports: - name: http-metrics - port: 3100 + port: {{ .Values.loki.server.http_listen_port }} targetPort: http-metrics protocol: TCP - name: grpc - port: 9095 + port: {{ .Values.loki.server.grpc_listen_port }} targetPort: grpc protocol: TCP appProtocol: tcp diff --git a/production/helm/loki/templates/read/service-read.yaml b/production/helm/loki/templates/read/service-read.yaml index b7306edc058d6..f4000fda5720e 100644 --- a/production/helm/loki/templates/read/service-read.yaml +++ b/production/helm/loki/templates/read/service-read.yaml @@ -25,11 +25,11 @@ spec: type: ClusterIP ports: - name: http-metrics - port: 3100 + port: {{ .Values.loki.server.http_listen_port }} targetPort: http-metrics protocol: TCP - name: grpc - port: 9095 + port: {{ .Values.loki.server.grpc_listen_port }} targetPort: grpc protocol: TCP selector: diff --git a/production/helm/loki/templates/read/statefulset-read.yaml b/production/helm/loki/templates/read/statefulset-read.yaml index eaea4aaecbc29..e0fd2c102975a 100644 --- a/production/helm/loki/templates/read/statefulset-read.yaml +++ b/production/helm/loki/templates/read/statefulset-read.yaml @@ -86,10 +86,10 @@ spec: {{- end }} ports: - name: http-metrics - containerPort: 3100 + containerPort: {{ .Values.loki.server.http_listen_port }} protocol: TCP - name: grpc - containerPort: 9095 + containerPort: {{ .Values.loki.server.grpc_listen_port }} protocol: TCP - name: http-memberlist containerPort: 7946 diff --git a/production/helm/loki/templates/single-binary/service-headless.yaml b/production/helm/loki/templates/single-binary/service-headless.yaml index 51e111982b95a..7522240afc72a 100644 --- a/production/helm/loki/templates/single-binary/service-headless.yaml +++ b/production/helm/loki/templates/single-binary/service-headless.yaml @@ -27,7 +27,7 @@ spec: clusterIP: None ports: - name: http-metrics - port: 3100 + port: {{ .Values.loki.server.http_listen_port }} targetPort: http-metrics protocol: TCP selector: diff --git a/production/helm/loki/templates/single-binary/service.yaml b/production/helm/loki/templates/single-binary/service.yaml index 1dee4eba8a6f8..352fcadf96ccd 100644 --- a/production/helm/loki/templates/single-binary/service.yaml +++ b/production/helm/loki/templates/single-binary/service.yaml @@ -25,11 +25,11 @@ spec: type: ClusterIP ports: - name: http-metrics - port: 3100 + port: {{ .Values.loki.server.http_listen_port }} targetPort: http-metrics protocol: TCP - name: grpc - port: 9095 + port: {{ .Values.loki.server.grpc_listen_port }} targetPort: grpc protocol: TCP selector: diff --git a/production/helm/loki/templates/single-binary/statefulset.yaml b/production/helm/loki/templates/single-binary/statefulset.yaml index 96c8974b72c26..8922c89ab33a2 100644 --- a/production/helm/loki/templates/single-binary/statefulset.yaml +++ b/production/helm/loki/templates/single-binary/statefulset.yaml @@ -90,10 +90,10 @@ spec: {{- end }} ports: - name: http-metrics - containerPort: 3100 + containerPort: {{ .Values.loki.server.http_listen_port }} protocol: TCP - name: grpc - containerPort: 9095 + containerPort: {{ .Values.loki.server.grpc_listen_port }} protocol: TCP - name: http-memberlist containerPort: 7946 diff --git a/production/helm/loki/templates/table-manager/deployment-table-manager.yaml b/production/helm/loki/templates/table-manager/deployment-table-manager.yaml index bc14d1f6df491..aeb5b1affea57 100644 --- a/production/helm/loki/templates/table-manager/deployment-table-manager.yaml +++ b/production/helm/loki/templates/table-manager/deployment-table-manager.yaml @@ -58,10 +58,10 @@ spec: {{- end }} ports: - name: http-metrics - containerPort: 3100 + containerPort: {{ .Values.loki.server.http_listen_port }} protocol: TCP - name: grpc - containerPort: 9095 + containerPort: {{ .Values.loki.server.grpc_listen_port }} protocol: TCP {{- with .Values.tableManager.extraEnv }} env: diff --git a/production/helm/loki/templates/table-manager/service-table-manager.yaml b/production/helm/loki/templates/table-manager/service-table-manager.yaml index 43443fb57550d..214cd3663e02e 100644 --- a/production/helm/loki/templates/table-manager/service-table-manager.yaml +++ b/production/helm/loki/templates/table-manager/service-table-manager.yaml @@ -23,11 +23,11 @@ spec: type: ClusterIP ports: - name: http-metrics - port: 3100 + port: {{ .Values.loki.server.http_listen_port }} targetPort: http-metrics protocol: TCP - name: grpc - port: 9095 + port: {{ .Values.loki.server.grpc_listen_port }} targetPort: grpc protocol: TCP selector: diff --git a/production/helm/loki/templates/write/service-write-headless.yaml b/production/helm/loki/templates/write/service-write-headless.yaml index 71cdcb7b2db8c..84cf5d7b178f3 100644 --- a/production/helm/loki/templates/write/service-write-headless.yaml +++ b/production/helm/loki/templates/write/service-write-headless.yaml @@ -28,11 +28,11 @@ spec: clusterIP: None ports: - name: http-metrics - port: 3100 + port: {{ .Values.loki.server.http_listen_port }} targetPort: http-metrics protocol: TCP - name: grpc - port: 9095 + port: {{ .Values.loki.server.grpc_listen_port }} targetPort: grpc protocol: TCP appProtocol: tcp diff --git a/production/helm/loki/templates/write/service-write.yaml b/production/helm/loki/templates/write/service-write.yaml index 35a548c0330c3..9603706e6f1eb 100644 --- a/production/helm/loki/templates/write/service-write.yaml +++ b/production/helm/loki/templates/write/service-write.yaml @@ -25,11 +25,11 @@ spec: type: ClusterIP ports: - name: http-metrics - port: 3100 + port: {{ .Values.loki.server.http_listen_port }} targetPort: http-metrics protocol: TCP - name: grpc - port: 9095 + port: {{ .Values.loki.server.grpc_listen_port }} targetPort: grpc protocol: TCP selector: diff --git a/production/helm/loki/templates/write/statefulset-write.yaml b/production/helm/loki/templates/write/statefulset-write.yaml index ca67038a16192..5aa1e78eaf58c 100644 --- a/production/helm/loki/templates/write/statefulset-write.yaml +++ b/production/helm/loki/templates/write/statefulset-write.yaml @@ -92,10 +92,10 @@ spec: {{- end }} ports: - name: http-metrics - containerPort: 3100 + containerPort: {{ .Values.loki.server.http_listen_port }} protocol: TCP - name: grpc - containerPort: 9095 + containerPort: {{ .Values.loki.server.grpc_listen_port }} protocol: TCP - name: http-memberlist containerPort: 7946 From e65e75a3348027a36f0b48f413e8f16f0eb06737 Mon Sep 17 00:00:00 2001 From: Lars Toenning Date: Fri, 15 Mar 2024 15:29:35 +0100 Subject: [PATCH 24/25] docs: Fix typo of docker driver configuration option (#12131) Co-authored-by: Michel Hollands <42814411+MichelHollands@users.noreply.github.com> Co-authored-by: J Stickler --- docs/sources/send-data/docker-driver/_index.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/sources/send-data/docker-driver/_index.md b/docs/sources/send-data/docker-driver/_index.md index 6a1b7c833ad51..f262829d58fd7 100644 --- a/docs/sources/send-data/docker-driver/_index.md +++ b/docs/sources/send-data/docker-driver/_index.md @@ -75,6 +75,6 @@ docker plugin rm loki The driver keeps all logs in memory and will drop log entries if Loki is not reachable and if the quantity of `max_retries` has been exceeded. To avoid the dropping of log entries, setting `max_retries` to zero allows unlimited retries; the driver will continue trying forever until Loki is again reachable. Trying forever may have undesired consequences, because the Docker daemon will wait for the Loki driver to process all logs of a container, until the container is removed. Thus, the Docker daemon might wait forever if the container is stuck. -The wait time can be lowered by setting `loki-retries=2`, `loki-max-backoff_800ms`, `loki-timeout=1s` and `keep-file=true`. This way the daemon will be locked only for a short time and the logs will be persisted locally when the Loki client is unable to re-connect. +The wait time can be lowered by setting `loki-retries=2`, `loki-max-backoff=800ms`, `loki-timeout=1s` and `keep-file=true`. This way the daemon will be locked only for a short time and the logs will be persisted locally when the Loki client is unable to re-connect. -To avoid this issue, use the Promtail [Docker target]({{< relref "../../send-data/promtail/configuration#docker" >}}) or [Docker service discovery]({{< relref "../../send-data/promtail/configuration#docker_sd_config" >}}). +To avoid this issue, use the Promtail [Docker target]({{< relref "../../send-data/promtail/configuration#docker" >}}) or [Docker service discovery]({{< relref "../../send-data/promtail/configuration#docker_sd_configs" >}}). From 0a90f0593dfe44330a74ab116df23dbce852bb10 Mon Sep 17 00:00:00 2001 From: ND Tai <49815011+taind772@users.noreply.github.com> Date: Fri, 15 Mar 2024 21:45:34 +0700 Subject: [PATCH 25/25] fix(helm): add protocol and port for default compactorAddress (#12191) Co-authored-by: Michel Hollands <42814411+MichelHollands@users.noreply.github.com> --- production/helm/loki/CHANGELOG.md | 4 ++++ production/helm/loki/Chart.yaml | 2 +- production/helm/loki/README.md | 2 +- production/helm/loki/templates/_helpers.tpl | 2 +- 4 files changed, 7 insertions(+), 3 deletions(-) diff --git a/production/helm/loki/CHANGELOG.md b/production/helm/loki/CHANGELOG.md index 54293fc955988..d6259a7fd60ad 100644 --- a/production/helm/loki/CHANGELOG.md +++ b/production/helm/loki/CHANGELOG.md @@ -13,6 +13,10 @@ Entries should include a reference to the pull request that introduced the chang [//]: # ( : do not remove this line. This locator is used by the CI pipeline to automatically create a changelog entry for each new Loki release. Add other chart versions and respective changelog entries bellow this line.) +## 5.44.1 + +- [BUGFIX] Fix `compactorAddress` field: add protocol and port. + ## 5.44.0 - [FEATURE] Modified helm template to use parameters http_listen_port and grpc_listen_port instead of hardcoded values. diff --git a/production/helm/loki/Chart.yaml b/production/helm/loki/Chart.yaml index e8eec882ec4fc..14a7d07e4559d 100644 --- a/production/helm/loki/Chart.yaml +++ b/production/helm/loki/Chart.yaml @@ -3,7 +3,7 @@ name: loki description: Helm chart for Grafana Loki in simple, scalable mode type: application appVersion: 2.9.4 -version: 5.44.0 +version: 5.44.1 home: https://grafana.github.io/helm-charts sources: - https://github.com/grafana/loki diff --git a/production/helm/loki/README.md b/production/helm/loki/README.md index 3bbdb2fdb7516..bf3fa55894843 100644 --- a/production/helm/loki/README.md +++ b/production/helm/loki/README.md @@ -1,6 +1,6 @@ # loki -![Version: 5.44.0](https://img.shields.io/badge/Version-5.44.0-informational?style=flat-square) ![Type: application](https://img.shields.io/badge/Type-application-informational?style=flat-square) ![AppVersion: 2.9.4](https://img.shields.io/badge/AppVersion-2.9.4-informational?style=flat-square) +![Version: 5.44.1](https://img.shields.io/badge/Version-5.44.1-informational?style=flat-square) ![Type: application](https://img.shields.io/badge/Type-application-informational?style=flat-square) ![AppVersion: 2.9.4](https://img.shields.io/badge/AppVersion-2.9.4-informational?style=flat-square) Helm chart for Grafana Loki in simple, scalable mode diff --git a/production/helm/loki/templates/_helpers.tpl b/production/helm/loki/templates/_helpers.tpl index ccd5f1725178c..1e858fba62a05 100644 --- a/production/helm/loki/templates/_helpers.tpl +++ b/production/helm/loki/templates/_helpers.tpl @@ -888,7 +888,7 @@ enableServiceLinks: false {{/* single binary */}} {{- $compactorAddress = include "loki.singleBinaryFullname" . -}} {{- end -}} -{{- printf "%s" $compactorAddress }} +{{- printf "http://%s:3100" $compactorAddress }} {{- end }} {{/* Determine query-scheduler address */}}