mirror of
https://github.com/cgrates/cgrates.git
synced 2026-02-11 10:06:24 +05:00
prometheus: add node_id label to cache metrics
This commit is contained in:
committed by
Dan Christian Bogos
parent
bdd854d363
commit
8466a65d63
@@ -27,7 +27,6 @@ import (
|
||||
"github.com/cgrates/cgrates/cores"
|
||||
"github.com/cgrates/cgrates/engine"
|
||||
"github.com/cgrates/cgrates/utils"
|
||||
"github.com/cgrates/ltcache"
|
||||
"github.com/prometheus/client_golang/prometheus"
|
||||
"github.com/prometheus/client_golang/prometheus/collectors"
|
||||
"github.com/prometheus/client_golang/prometheus/promhttp"
|
||||
@@ -165,7 +164,7 @@ func NewPrometheusAgent(cfg *config.CGRConfig, cm *engine.ConnManager) *Promethe
|
||||
Subsystem: "cache",
|
||||
Name: "groups_total",
|
||||
Help: "Total number of cache groups",
|
||||
}, []string{"cache"})
|
||||
}, []string{"cache", "node_id"})
|
||||
reg.MustRegister(cacheGroupsMetric)
|
||||
|
||||
cacheItemsMetric := prometheus.NewGaugeVec(
|
||||
@@ -174,7 +173,7 @@ func NewPrometheusAgent(cfg *config.CGRConfig, cm *engine.ConnManager) *Promethe
|
||||
Subsystem: "cache",
|
||||
Name: "items_total",
|
||||
Help: "Total number of cache items",
|
||||
}, []string{"cache"})
|
||||
}, []string{"cache", "node_id"})
|
||||
reg.MustRegister(cacheItemsMetric)
|
||||
statMetrics := prometheus.NewGaugeVec(
|
||||
prometheus.GaugeOpts{
|
||||
@@ -255,24 +254,26 @@ func (pa *PrometheusAgent) updateCacheStats() {
|
||||
return
|
||||
}
|
||||
for _, connID := range pa.cfg.PrometheusAgentCfg().CacheSConns {
|
||||
var cacheStats map[string]*ltcache.CacheStats
|
||||
var reply engine.CacheStatsWithMetadata
|
||||
if err := pa.cm.Call(context.Background(), []string{connID},
|
||||
utils.CacheSv1GetCacheStats,
|
||||
utils.CacheSv1GetStats,
|
||||
&utils.AttrCacheIDsWithAPIOpts{
|
||||
CacheIDs: pa.cfg.PrometheusAgentCfg().CacheIDs,
|
||||
}, &cacheStats); err != nil {
|
||||
}, &reply); err != nil {
|
||||
utils.Logger.Err(fmt.Sprintf(
|
||||
"<%s> failed to retrieve cache stats (connID=%q): %v",
|
||||
utils.PrometheusAgent, connID, err))
|
||||
continue
|
||||
}
|
||||
cacheStats := reply.CacheStatistics
|
||||
nodeID := utils.IfaceAsString(reply.Metadata[utils.NodeID])
|
||||
|
||||
for cacheID, stats := range cacheStats {
|
||||
if stats == nil {
|
||||
continue
|
||||
}
|
||||
pa.cacheGroupsMetric.WithLabelValues(cacheID).Set(float64(stats.Groups))
|
||||
pa.cacheItemsMetric.WithLabelValues(cacheID).Set(float64(stats.Items))
|
||||
pa.cacheGroupsMetric.WithLabelValues(cacheID, nodeID).Set(float64(stats.Groups))
|
||||
pa.cacheItemsMetric.WithLabelValues(cacheID, nodeID).Set(float64(stats.Items))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -90,6 +90,12 @@ func (chSv1 *CacheSv1) GetCacheStats(ctx *context.Context, args *utils.AttrCache
|
||||
return chSv1.cacheS.V1GetCacheStats(ctx, args, rply)
|
||||
}
|
||||
|
||||
// GetStats returns CacheStats filtered by cacheIDs
|
||||
func (chSv1 *CacheSv1) GetStats(ctx *context.Context, args *utils.AttrCacheIDsWithAPIOpts,
|
||||
rply *engine.CacheStatsWithMetadata) error {
|
||||
return chSv1.cacheS.V1GetStats(ctx, args, rply)
|
||||
}
|
||||
|
||||
// PrecacheStatus checks status of active precache processes
|
||||
func (chSv1 *CacheSv1) PrecacheStatus(ctx *context.Context, args *utils.AttrCacheIDsWithAPIOpts, rply *map[string]string) error {
|
||||
return chSv1.cacheS.V1PrecacheStatus(ctx, args, rply)
|
||||
|
||||
@@ -7,7 +7,7 @@ PrometheusAgent
|
||||
|
||||
1. **Core metrics** - collected from configured CGRateS engines via CoreSv1.Status API
|
||||
2. **StatQueue metrics** - values from CGRateS :ref:`StatS <stats>` component, collected via StatSv1.GetQueueFloatMetrics API
|
||||
3. **Cache statistics** - collected from configured :ref:`CacheS <caches>` components via CacheSv1.GetCacheStats API
|
||||
3. **Cache statistics** - collected from configured :ref:`CacheS <caches>` components via CacheSv1.GetStats API
|
||||
|
||||
For core metrics, the agent computes real-time values on each Prometheus scrape request. For StatQueue metrics, it retrieves the current state of the stored StatQueues without additional calculations. For cache statistics, it collects current cache utilization data from the configured cache partitions.
|
||||
|
||||
@@ -103,9 +103,10 @@ The PrometheusAgent exposes the following metrics:
|
||||
go_memstats_alloc_bytes{node_id="e94160b"} 1.1360808e+07
|
||||
|
||||
3. **Cache Metrics** (when caches_conns is configured)
|
||||
- Two separate metrics for cache statistics: ``cgrates_cache_groups_total`` and ``cgrates_cache_items_total`` with cache partition ID label
|
||||
- Two separate metrics for cache statistics: ``cgrates_cache_groups_total`` and ``cgrates_cache_items_total`` with cache partition ID and node_id labels
|
||||
- Obtained from CacheS services on each scrape request
|
||||
- Useful for identifying memory usage patterns and potential performance issues
|
||||
- Includes node_id labels for multi-engine environments, allowing collection from multiple CGRateS engines
|
||||
|
||||
Example of cache metrics output:
|
||||
|
||||
@@ -113,15 +114,15 @@ The PrometheusAgent exposes the following metrics:
|
||||
|
||||
# HELP cgrates_cache_groups_total Total number of cache groups
|
||||
# TYPE cgrates_cache_groups_total gauge
|
||||
cgrates_cache_groups_total{cache="*attribute_filter_indexes"} 2
|
||||
cgrates_cache_groups_total{cache="*charger_profiles"} 0
|
||||
cgrates_cache_groups_total{cache="*rpc_connections"} 0
|
||||
cgrates_cache_groups_total{cache="*attribute_filter_indexes",node_id="dc2cb63"} 2
|
||||
cgrates_cache_groups_total{cache="*charger_profiles",node_id="dc2cb63"} 0
|
||||
cgrates_cache_groups_total{cache="*rpc_connections",node_id="dc2cb63"} 0
|
||||
|
||||
# HELP cgrates_cache_items_total Total number of cache items
|
||||
# TYPE cgrates_cache_items_total gauge
|
||||
cgrates_cache_items_total{cache="*attribute_filter_indexes"} 6
|
||||
cgrates_cache_items_total{cache="*charger_profiles"} 2
|
||||
cgrates_cache_items_total{cache="*rpc_connections"} 1
|
||||
cgrates_cache_items_total{cache="*attribute_filter_indexes",node_id="dc2cb63"} 6
|
||||
cgrates_cache_items_total{cache="*charger_profiles",node_id="dc2cb63"} 2
|
||||
cgrates_cache_items_total{cache="*rpc_connections",node_id="dc2cb63"} 1
|
||||
|
||||
|
||||
How It Works
|
||||
|
||||
@@ -443,6 +443,22 @@ func (chS *CacheS) V1GetCacheStats(ctx *context.Context, args *utils.AttrCacheID
|
||||
return
|
||||
}
|
||||
|
||||
type CacheStatsWithMetadata struct {
|
||||
CacheStatistics map[string]*ltcache.CacheStats
|
||||
Metadata map[string]any
|
||||
}
|
||||
|
||||
func (chS *CacheS) V1GetStats(ctx *context.Context, args *utils.AttrCacheIDsWithAPIOpts,
|
||||
rply *CacheStatsWithMetadata) error {
|
||||
*rply = CacheStatsWithMetadata{
|
||||
CacheStatistics: chS.tCache.GetCacheStats(args.CacheIDs),
|
||||
Metadata: map[string]any{
|
||||
utils.NodeID: chS.cfg.GeneralCfg().NodeID,
|
||||
},
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (chS *CacheS) V1PrecacheStatus(ctx *context.Context, args *utils.AttrCacheIDsWithAPIOpts, rply *map[string]string) (err error) {
|
||||
if len(args.CacheIDs) == 0 {
|
||||
args.CacheIDs = utils.CachePartitions.AsSlice()
|
||||
|
||||
@@ -61,8 +61,13 @@ func TestPrometheusAgentIT(t *testing.T) {
|
||||
"prometheus_agent": {
|
||||
"enabled": true,
|
||||
"path": "/metrics",
|
||||
"collect_go_metrics": false,
|
||||
"collect_process_metrics": false,
|
||||
"caches_conns": ["*localhost", "external"],
|
||||
"cache_ids": [
|
||||
"*statqueue_profiles",
|
||||
"*statqueues",
|
||||
"*stat_filter_indexes",
|
||||
"*rpc_connections"
|
||||
],
|
||||
"stats_conns": ["*localhost", "external"],
|
||||
"stat_queue_ids": ["cgrates.org:SQ_1","SQ_2"]
|
||||
}
|
||||
|
||||
@@ -2010,6 +2010,7 @@ const (
|
||||
const (
|
||||
CacheSv1 = "CacheSv1"
|
||||
CacheSv1GetCacheStats = "CacheSv1.GetCacheStats"
|
||||
CacheSv1GetStats = "CacheSv1.GetStats"
|
||||
CacheSv1GetItemIDs = "CacheSv1.GetItemIDs"
|
||||
CacheSv1HasItem = "CacheSv1.HasItem"
|
||||
CacheSv1GetItem = "CacheSv1.GetItem"
|
||||
|
||||
Reference in New Issue
Block a user