From e8c770c4152d2e4b5fd90e107d2b58fbd91175fb Mon Sep 17 00:00:00 2001 From: ionutboangiu Date: Fri, 18 Jul 2025 22:36:55 +0300 Subject: [PATCH] prometheus: add node_id label to cache metrics --- agents/prometheus.go | 17 +++++++++-------- apis/cache.go | 9 ++++----- apis/cache_it_test.go | 20 +++++++++----------- apis/cache_test.go | 10 +++++----- console/cache_stats.go | 7 +++---- docs/prometheus.rst | 17 +++++++++-------- engine/caches.go | 16 ++++++++++++++++ general_tests/prometheus_it_test.go | 9 +++++++-- general_tests/tut_smgeneric_it_test.go | 7 +++---- utils/consts.go | 3 +-- 10 files changed, 66 insertions(+), 49 deletions(-) diff --git a/agents/prometheus.go b/agents/prometheus.go index 8718598ee..29e4844ac 100644 --- a/agents/prometheus.go +++ b/agents/prometheus.go @@ -27,7 +27,6 @@ import ( "github.com/cgrates/cgrates/cores" "github.com/cgrates/cgrates/engine" "github.com/cgrates/cgrates/utils" - "github.com/cgrates/ltcache" "github.com/prometheus/client_golang/prometheus" "github.com/prometheus/client_golang/prometheus/collectors" "github.com/prometheus/client_golang/prometheus/promhttp" @@ -107,7 +106,7 @@ func NewPrometheusAgent(cfg *config.CGRConfig, cm *engine.ConnManager) *Promethe Subsystem: "cache", Name: "groups_total", Help: "Total number of cache groups", - }, []string{"cache"}) + }, []string{"cache", "node_id"}) reg.MustRegister(cacheGroupsMetric) cacheItemsMetric := prometheus.NewGaugeVec( @@ -116,7 +115,7 @@ func NewPrometheusAgent(cfg *config.CGRConfig, cm *engine.ConnManager) *Promethe Subsystem: "cache", Name: "items_total", Help: "Total number of cache items", - }, []string{"cache"}) + }, []string{"cache", "node_id"}) reg.MustRegister(cacheItemsMetric) statMetrics := prometheus.NewGaugeVec( prometheus.GaugeOpts{ @@ -197,24 +196,26 @@ func (pa *PrometheusAgent) updateCacheStats() { return } for _, connID := range pa.cfg.PrometheusAgentCfg().CacheSConns { - var cacheStats map[string]*ltcache.CacheStats + var reply engine.CacheStatsWithMetadata if err := pa.cm.Call(context.Background(), []string{connID}, - utils.CacheSv1GetCacheStats, + utils.CacheSv1GetStats, &utils.AttrCacheIDsWithAPIOpts{ CacheIDs: pa.cfg.PrometheusAgentCfg().CacheIDs, - }, &cacheStats); err != nil { + }, &reply); err != nil { utils.Logger.Err(fmt.Sprintf( "<%s> failed to retrieve cache stats (connID=%q): %v", utils.PrometheusAgent, connID, err)) continue } + cacheStats := reply.CacheStatistics + nodeID := utils.IfaceAsString(reply.Metadata[utils.NodeID]) for cacheID, stats := range cacheStats { if stats == nil { continue } - pa.cacheGroupsMetric.WithLabelValues(cacheID).Set(float64(stats.Groups)) - pa.cacheItemsMetric.WithLabelValues(cacheID).Set(float64(stats.Items)) + pa.cacheGroupsMetric.WithLabelValues(cacheID, nodeID).Set(float64(stats.Groups)) + pa.cacheItemsMetric.WithLabelValues(cacheID, nodeID).Set(float64(stats.Items)) } } } diff --git a/apis/cache.go b/apis/cache.go index 26872a7f2..23cef7ed9 100644 --- a/apis/cache.go +++ b/apis/cache.go @@ -25,7 +25,6 @@ import ( "github.com/cgrates/cgrates/engine" "github.com/cgrates/cgrates/utils" - "github.com/cgrates/ltcache" ) type CacheSv1 struct { @@ -85,10 +84,10 @@ func (chSv1 *CacheSv1) Clear(ctx *context.Context, args *utils.AttrCacheIDsWithA return chSv1.cacheS.V1Clear(ctx, args, reply) } -// GetCacheStats returns CacheStats filtered by cacheIDs -func (chSv1 *CacheSv1) GetCacheStats(ctx *context.Context, args *utils.AttrCacheIDsWithAPIOpts, // - rply *map[string]*ltcache.CacheStats) error { - return chSv1.cacheS.V1GetCacheStats(ctx, args, rply) +// GetStats returns CacheStats filtered by cacheIDs +func (chSv1 *CacheSv1) GetStats(ctx *context.Context, args *utils.AttrCacheIDsWithAPIOpts, + rply *engine.CacheStatsWithMetadata) error { + return chSv1.cacheS.V1GetStats(ctx, args, rply) } // PrecacheStatus checks status of active precache processes diff --git a/apis/cache_it_test.go b/apis/cache_it_test.go index 5704e389a..076f9919a 100644 --- a/apis/cache_it_test.go +++ b/apis/cache_it_test.go @@ -26,8 +26,6 @@ import ( "testing" "time" - "github.com/cgrates/ltcache" - "github.com/cgrates/birpc/context" "github.com/cgrates/cgrates/engine" @@ -342,16 +340,16 @@ func testCacheSLoadCache(t *testing.T) { t.Errorf("Unexpected reply returned") } - var rcvStats map[string]*ltcache.CacheStats + var rcvStats engine.CacheStatsWithMetadata expstats := engine.GetDefaultEmptyCacheStats() expstats[utils.CacheAttributeProfiles].Items = 1 expstats[utils.CacheAttributeFilterIndexes].Groups = 1 expstats[utils.CacheAttributeFilterIndexes].Items = 1 expstats[utils.CacheLoadIDs].Items = 28 - if err := chcRPC.Call(context.Background(), utils.CacheSv1GetCacheStats, + if err := chcRPC.Call(context.Background(), utils.CacheSv1GetStats, new(utils.AttrCacheIDsWithAPIOpts), &rcvStats); err != nil { t.Error(err) - } else if !reflect.DeepEqual(rcvStats, expstats) { + } else if !reflect.DeepEqual(rcvStats.CacheStatistics, expstats) { t.Errorf("Expected %+v \n, received %+v", utils.ToJSON(expstats), utils.ToJSON(rcvStats)) } } @@ -518,7 +516,7 @@ func testCacheGetStatusMoreIDs(t *testing.T) { t.Errorf("Unexpected reply returned") } - var rcvStats map[string]*ltcache.CacheStats + var rcvStats engine.CacheStatsWithMetadata expstats := engine.GetDefaultEmptyCacheStats() expstats[utils.CacheAttributeProfiles].Items = 4 expstats[utils.CacheAttributeFilterIndexes].Groups = 1 @@ -531,10 +529,10 @@ func testCacheGetStatusMoreIDs(t *testing.T) { expstats[utils.CacheFilters].Items = 6 expstats[utils.CacheRPCConnections].Items = 1 expstats[utils.CacheLoadIDs].Items = 28 - if err := chcRPC.Call(context.Background(), utils.CacheSv1GetCacheStats, + if err := chcRPC.Call(context.Background(), utils.CacheSv1GetStats, new(utils.AttrCacheIDsWithAPIOpts), &rcvStats); err != nil { t.Error(err) - } else if !reflect.DeepEqual(rcvStats, expstats) { + } else if !reflect.DeepEqual(rcvStats.CacheStatistics, expstats) { t.Errorf("Expected %+v \n, received %+v", utils.ToJSON(expstats), utils.ToJSON(rcvStats)) } } @@ -549,12 +547,12 @@ func testCacheSClearCache(t *testing.T) { } //all cache cleared, empty items in cache - var rcvStats map[string]*ltcache.CacheStats + var rcvStats engine.CacheStatsWithMetadata expStats := engine.GetDefaultEmptyCacheStats() - if err := chcRPC.Call(context.Background(), utils.CacheSv1GetCacheStats, + if err := chcRPC.Call(context.Background(), utils.CacheSv1GetStats, new(utils.AttrCacheIDsWithAPIOpts), &rcvStats); err != nil { t.Error(err) - } else if !reflect.DeepEqual(rcvStats, expStats) { + } else if !reflect.DeepEqual(rcvStats.CacheStatistics, expStats) { t.Errorf("Expected %+v \n, received %+v", utils.ToJSON(expStats), utils.ToJSON(rcvStats)) } } diff --git a/apis/cache_test.go b/apis/cache_test.go index 0657ea3dc..eab403c13 100644 --- a/apis/cache_test.go +++ b/apis/cache_test.go @@ -27,7 +27,6 @@ import ( "github.com/cgrates/cgrates/config" "github.com/cgrates/cgrates/engine" "github.com/cgrates/cgrates/utils" - "github.com/cgrates/ltcache" ) func TestCacheHasItemAndGetItem(t *testing.T) { @@ -293,18 +292,19 @@ func TestGetCacheStats(t *testing.T) { ch := engine.NewCacheS(cfg, dm, connMgr, nil) cache := NewCacheSv1(ch) ch.SetWithoutReplicate(utils.CacheAttributeProfiles, "cgrates.org:TestGetCacheStats", nil, nil, true, utils.NonTransactional) - var reply map[string]*ltcache.CacheStats + var reply engine.CacheStatsWithMetadata args := &utils.AttrCacheIDsWithAPIOpts{ Tenant: "cgrates.org", APIOpts: map[string]any{}, CacheIDs: []string{utils.CacheAttributeProfiles}, } - if err := cache.GetCacheStats(context.Background(), args, &reply); err != nil { + if err := cache.GetStats(context.Background(), args, &reply); err != nil { t.Error(err) } - if reply[utils.CacheAttributeProfiles].Items != 1 { - t.Errorf("Expected 1\n but received %v", reply[utils.CacheAttributeProfiles].Items) + cacheStats := reply.CacheStatistics + if cacheStats[utils.CacheAttributeProfiles].Items != 1 { + t.Errorf("Expected 1\n but received %v", cacheStats[utils.CacheAttributeProfiles].Items) } } diff --git a/console/cache_stats.go b/console/cache_stats.go index 9884dfa09..b8e2fbc57 100644 --- a/console/cache_stats.go +++ b/console/cache_stats.go @@ -19,14 +19,14 @@ along with this program. If not, see package console import ( + "github.com/cgrates/cgrates/engine" "github.com/cgrates/cgrates/utils" - "github.com/cgrates/ltcache" ) func init() { c := &CmdGetCacheStats{ name: "cache_stats", - rpcMethod: utils.CacheSv1GetCacheStats, + rpcMethod: utils.CacheSv1GetStats, rpcParams: &utils.AttrCacheIDsWithAPIOpts{}, } commands[c.Name()] = c @@ -61,6 +61,5 @@ func (self *CmdGetCacheStats) PostprocessRpcParams() error { } func (self *CmdGetCacheStats) RpcResult() any { - reply := make(map[string]*ltcache.CacheStats) - return &reply + return &engine.CacheStatsWithMetadata{} } diff --git a/docs/prometheus.rst b/docs/prometheus.rst index 01ea4ce5d..6fc739360 100644 --- a/docs/prometheus.rst +++ b/docs/prometheus.rst @@ -7,7 +7,7 @@ PrometheusAgent 1. **Core metrics** - collected from configured CGRateS engines via CoreSv1.Status API 2. **StatQueue metrics** - values from CGRateS :ref:`StatS ` component, collected via StatSv1.GetQueueFloatMetrics API -3. **Cache statistics** - collected from configured :ref:`CacheS ` components via CacheSv1.GetCacheStats API +3. **Cache statistics** - collected from configured :ref:`CacheS ` components via CacheSv1.GetStats API For core metrics, the agent computes real-time values on each Prometheus scrape request. For StatQueue metrics, it retrieves the current state of the stored StatQueues without additional calculations. For cache statistics, it collects current cache utilization data from the configured cache partitions. @@ -103,9 +103,10 @@ The PrometheusAgent exposes the following metrics: go_memstats_alloc_bytes{node_id="e94160b"} 1.1360808e+07 3. **Cache Metrics** (when caches_conns is configured) - - Two separate metrics for cache statistics: ``cgrates_cache_groups_total`` and ``cgrates_cache_items_total`` with cache partition ID label + - Two separate metrics for cache statistics: ``cgrates_cache_groups_total`` and ``cgrates_cache_items_total`` with cache partition ID and node_id labels - Obtained from CacheS services on each scrape request - Useful for identifying memory usage patterns and potential performance issues + - Includes node_id labels for multi-engine environments, allowing collection from multiple CGRateS engines Example of cache metrics output: @@ -113,15 +114,15 @@ The PrometheusAgent exposes the following metrics: # HELP cgrates_cache_groups_total Total number of cache groups # TYPE cgrates_cache_groups_total gauge - cgrates_cache_groups_total{cache="*attribute_filter_indexes"} 2 - cgrates_cache_groups_total{cache="*charger_profiles"} 0 - cgrates_cache_groups_total{cache="*rpc_connections"} 0 + cgrates_cache_groups_total{cache="*attribute_filter_indexes",node_id="dc2cb63"} 2 + cgrates_cache_groups_total{cache="*charger_profiles",node_id="dc2cb63"} 0 + cgrates_cache_groups_total{cache="*rpc_connections",node_id="dc2cb63"} 0 # HELP cgrates_cache_items_total Total number of cache items # TYPE cgrates_cache_items_total gauge - cgrates_cache_items_total{cache="*attribute_filter_indexes"} 6 - cgrates_cache_items_total{cache="*charger_profiles"} 2 - cgrates_cache_items_total{cache="*rpc_connections"} 1 + cgrates_cache_items_total{cache="*attribute_filter_indexes",node_id="dc2cb63"} 6 + cgrates_cache_items_total{cache="*charger_profiles",node_id="dc2cb63"} 2 + cgrates_cache_items_total{cache="*rpc_connections",node_id="dc2cb63"} 1 How It Works diff --git a/engine/caches.go b/engine/caches.go index 6f7b72788..681c6f335 100644 --- a/engine/caches.go +++ b/engine/caches.go @@ -396,6 +396,22 @@ func (chS *CacheS) V1GetCacheStats(ctx *context.Context, args *utils.AttrCacheID return } +type CacheStatsWithMetadata struct { + CacheStatistics map[string]*ltcache.CacheStats + Metadata map[string]any +} + +func (chS *CacheS) V1GetStats(ctx *context.Context, args *utils.AttrCacheIDsWithAPIOpts, + rply *CacheStatsWithMetadata) error { + *rply = CacheStatsWithMetadata{ + CacheStatistics: chS.tCache.GetCacheStats(args.CacheIDs), + Metadata: map[string]any{ + utils.NodeID: chS.cfg.GeneralCfg().NodeID, + }, + } + return nil +} + func (chS *CacheS) V1PrecacheStatus(_ *context.Context, args *utils.AttrCacheIDsWithAPIOpts, rply *map[string]string) (err error) { if len(args.CacheIDs) == 0 { args.CacheIDs = utils.CachePartitions.AsSlice() diff --git a/general_tests/prometheus_it_test.go b/general_tests/prometheus_it_test.go index d6013c4ea..aefac99a3 100644 --- a/general_tests/prometheus_it_test.go +++ b/general_tests/prometheus_it_test.go @@ -58,8 +58,13 @@ func TestPrometheusAgentIT(t *testing.T) { "prometheus_agent": { "enabled": true, "path": "/metrics", - "collect_go_metrics": false, - "collect_process_metrics": false, + "caches_conns": ["*localhost", "external"], + "cache_ids": [ + "*statqueue_profiles", + "*statqueues", + "*stat_filter_indexes", + "*rpc_connections" + ], "stats_conns": ["*localhost", "external"], "stat_queue_ids": ["cgrates.org:SQ_1","SQ_2"] } diff --git a/general_tests/tut_smgeneric_it_test.go b/general_tests/tut_smgeneric_it_test.go index dd0921b02..10063092b 100644 --- a/general_tests/tut_smgeneric_it_test.go +++ b/general_tests/tut_smgeneric_it_test.go @@ -32,7 +32,6 @@ import ( "github.com/cgrates/cgrates/engine" "github.com/cgrates/cgrates/loaders" "github.com/cgrates/cgrates/utils" - "github.com/cgrates/ltcache" ) var ( @@ -134,7 +133,7 @@ func testTutSMGCacheStats(t *testing.T) { // Actions: 9, ActionPlans: 4, AccountActionPlans: 5, SharedGroups: 1, ResourceProfiles: 3, // Resources: 3, StatQueues: 1, StatQueueProfiles: 1, Thresholds: 7, ThresholdProfiles: 7, Filters: 15, // SupplierProfiles: 3, AttributeProfiles: 2} - var rcvStats map[string]*ltcache.CacheStats + var rcvStats engine.CacheStatsWithMetadata expectedStats := engine.GetDefaultEmptyCacheStats() expectedStats[utils.CacheAccountsFilterIndexes].Items = 1 expectedStats[utils.CacheAccountsFilterIndexes].Groups = 1 @@ -184,9 +183,9 @@ func testTutSMGCacheStats(t *testing.T) { expectedStats[utils.CacheAttributeFilterIndexes].Groups = 1 expectedStats[utils.CacheReverseFilterIndexes].Items = 19 expectedStats[utils.CacheReverseFilterIndexes].Groups = 16 - if err := tutSMGRpc.Call(context.Background(), utils.CacheSv1GetCacheStats, new(utils.AttrCacheIDsWithAPIOpts), &rcvStats); err != nil { + if err := tutSMGRpc.Call(context.Background(), utils.CacheSv1GetStats, new(utils.AttrCacheIDsWithAPIOpts), &rcvStats); err != nil { t.Error("Got error on CacheSv1.GetCacheStats: ", err.Error()) - } else if !reflect.DeepEqual(expectedStats, rcvStats) { + } else if !reflect.DeepEqual(expectedStats, rcvStats.CacheStatistics) { t.Errorf("expected: %+v,\n received: %+v", utils.ToJSON(expectedStats), utils.ToJSON(rcvStats)) } } diff --git a/utils/consts.go b/utils/consts.go index 54ead776b..4e99e913b 100644 --- a/utils/consts.go +++ b/utils/consts.go @@ -1438,7 +1438,6 @@ const ( // APIerSv1GetDataCost = "APIerSv1.GetDataCost" // APIerSv1ReplayFailedPosts = "APIerSv1.ReplayFailedPosts" - // APIerSv1GetCacheStats = "APIerSv1.GetCacheStats" // APIerSv1ReloadCache = "APIerSv1.ReloadCache" // APIerSv1RemoveActions = "APIerSv1.RemoveActions" // APIerSv1GetLoadHistory = "APIerSv1.GetLoadHistory" @@ -1764,7 +1763,7 @@ const ( // CacheS APIs const ( CacheSv1 = "CacheSv1" - CacheSv1GetCacheStats = "CacheSv1.GetCacheStats" + CacheSv1GetStats = "CacheSv1.GetStats" CacheSv1GetItemIDs = "CacheSv1.GetItemIDs" CacheSv1HasItem = "CacheSv1.HasItem" CacheSv1GetItem = "CacheSv1.GetItem"