prometheus: use apiers_conns for StatQueue fetch

This commit is contained in:
ionutboangiu
2025-09-19 17:56:01 +03:00
committed by Dan Christian Bogos
parent a0ced56275
commit bb7e7572df
7 changed files with 33 additions and 16 deletions

View File

@@ -21,7 +21,6 @@ package agents
import ( import (
"fmt" "fmt"
"net/http" "net/http"
"strings"
"github.com/cgrates/birpc/context" "github.com/cgrates/birpc/context"
"github.com/cgrates/cgrates/config" "github.com/cgrates/cgrates/config"
@@ -217,20 +216,12 @@ func (pa *PrometheusAgent) ServeHTTP(w http.ResponseWriter, r *http.Request) {
// updateStatsMetrics fetches and updates all StatQueue metrics by calling each // updateStatsMetrics fetches and updates all StatQueue metrics by calling each
// configured StatS connection. // configured StatS connection.
func (pa *PrometheusAgent) updateStatsMetrics() { func (pa *PrometheusAgent) updateStatsMetrics() {
for _, connID := range pa.cfg.PrometheusAgentCfg().StatSConns { for connIdx, connID := range pa.cfg.PrometheusAgentCfg().StatSConns {
sqIDs := pa.cfg.PrometheusAgentCfg().StatQueueIDs sqIDs := pa.cfg.PrometheusAgentCfg().StatQueueIDs
// When no StatQueueIDs set, fetch all available ones. // When no StatQueueIDs set, fetch all available ones.
if len(sqIDs) == 0 { if len(sqIDs) == 0 {
apiersConnID := pa.cfg.PrometheusAgentCfg().ApierSConns[connIdx]
// Internal StatS connections cannot handle APIerS calls.
// Redirect *internal:*stats to *internal:*apier to get StatQueue IDs.
apiersConnID := connID
if strings.HasPrefix(connID, utils.MetaInternal) {
apiersConnID = utils.ConcatenatedKey(utils.MetaInternal,
utils.MetaApier)
}
if err := pa.cm.Call(context.Background(), []string{apiersConnID}, if err := pa.cm.Call(context.Background(), []string{apiersConnID},
utils.APIerSv1GetStatQueueProfileIDs, utils.APIerSv1GetStatQueueProfileIDs,
&utils.PaginatorWithTenant{}, &sqIDs); err != nil { &utils.PaginatorWithTenant{}, &sqIDs); err != nil {

View File

@@ -841,6 +841,7 @@ const CGRATES_CFG_JSON = `
"prometheus_agent": { "prometheus_agent": {
"enabled": false, // enables the prometheus agent: <true|false> "enabled": false, // enables the prometheus agent: <true|false>
"path": "/prometheus", // endpoint for prometheus metrics "path": "/prometheus", // endpoint for prometheus metrics
"apiers_conns": [], // connections to ApierS, empty to disable: <""|*internal|$rpc_conns_id>
"caches_conns": [], // connections to CacheS, empty to disable: <""|*internal|$rpc_conns_id> "caches_conns": [], // connections to CacheS, empty to disable: <""|*internal|$rpc_conns_id>
"cache_ids": [], // cache partition IDs to collect statistics for, empty for all partitions "cache_ids": [], // cache partition IDs to collect statistics for, empty for all partitions
"cores_conns": [], // connections to CoreS, empty to disable: <""|*internal|$rpc_conns_id> "cores_conns": [], // connections to CoreS, empty to disable: <""|*internal|$rpc_conns_id>

File diff suppressed because one or more lines are too long

View File

@@ -1341,10 +1341,18 @@ func (cfg *CGRConfig) checkConfigSanity() error {
if cfg.prometheusAgentCfg.Enabled { if cfg.prometheusAgentCfg.Enabled {
if len(cfg.prometheusAgentCfg.StatSConns) > 0 && if len(cfg.prometheusAgentCfg.StatSConns) > 0 &&
len(cfg.prometheusAgentCfg.StatQueueIDs) == 0 && len(cfg.prometheusAgentCfg.StatQueueIDs) == 0 &&
!cfg.apier.Enabled { len(cfg.prometheusAgentCfg.StatSConns) != len(cfg.prometheusAgentCfg.ApierSConns) {
return fmt.Errorf( return fmt.Errorf(
"<%s> when StatQueueIDs is empty, %s must be enabled to retrieve all available StatQueue IDs", "<%s> when StatQueueIDs is empty, apiers_conns must match stats_conns length to fetch StatQueue IDs",
utils.PrometheusAgent, utils.ApierS) utils.PrometheusAgent)
}
for _, connID := range cfg.prometheusAgentCfg.ApierSConns {
if strings.HasPrefix(connID, utils.MetaInternal) && !cfg.apier.Enabled {
return fmt.Errorf("<%s> not enabled but requested by <%s> component", utils.ApierS, utils.PrometheusAgent)
}
if _, has := cfg.rpcConns[connID]; !has && !strings.HasPrefix(connID, utils.MetaInternal) {
return fmt.Errorf("<%s> connection with id: <%s> not defined", utils.PrometheusAgent, connID)
}
} }
for _, connID := range cfg.prometheusAgentCfg.StatSConns { for _, connID := range cfg.prometheusAgentCfg.StatSConns {
if strings.HasPrefix(connID, utils.MetaInternal) && !cfg.statsCfg.Enabled { if strings.HasPrefix(connID, utils.MetaInternal) && !cfg.statsCfg.Enabled {

View File

@@ -33,6 +33,7 @@ type PrometheusAgentJsonCfg struct {
CacheSConns *[]string `json:"caches_conns"` CacheSConns *[]string `json:"caches_conns"`
CacheIDs *[]string `json:"cache_ids"` CacheIDs *[]string `json:"cache_ids"`
CoreSConns *[]string `json:"cores_conns"` CoreSConns *[]string `json:"cores_conns"`
ApierSConns *[]string `json:"apiers_conns"`
StatSConns *[]string `json:"stats_conns"` StatSConns *[]string `json:"stats_conns"`
StatQueueIDs *[]string `json:"stat_queue_ids"` StatQueueIDs *[]string `json:"stat_queue_ids"`
} }
@@ -46,6 +47,7 @@ type PrometheusAgentCfg struct {
CacheSConns []string CacheSConns []string
CacheIDs []string CacheIDs []string
CoreSConns []string CoreSConns []string
ApierSConns []string
StatSConns []string StatSConns []string
StatQueueIDs []string StatQueueIDs []string
} }
@@ -75,6 +77,9 @@ func (c *PrometheusAgentCfg) loadFromJSONCfg(jc *PrometheusAgentJsonCfg) error {
if jc.CoreSConns != nil { if jc.CoreSConns != nil {
c.CoreSConns = tagInternalConns(*jc.CoreSConns, utils.MetaCore) c.CoreSConns = tagInternalConns(*jc.CoreSConns, utils.MetaCore)
} }
if jc.ApierSConns != nil {
c.ApierSConns = tagInternalConns(*jc.ApierSConns, utils.MetaApier)
}
if jc.StatSConns != nil { if jc.StatSConns != nil {
c.StatSConns = tagInternalConns(*jc.StatSConns, utils.MetaStats) c.StatSConns = tagInternalConns(*jc.StatSConns, utils.MetaStats)
} }
@@ -94,6 +99,7 @@ func (c PrometheusAgentCfg) AsMapInterface() any {
utils.CacheSConnsCfg: stripInternalConns(c.CacheSConns), utils.CacheSConnsCfg: stripInternalConns(c.CacheSConns),
utils.CacheIDsCfg: stripInternalConns(c.CacheIDs), utils.CacheIDsCfg: stripInternalConns(c.CacheIDs),
utils.CoreSConnsCfg: stripInternalConns(c.CoreSConns), utils.CoreSConnsCfg: stripInternalConns(c.CoreSConns),
utils.ApierSConnsCfg: stripInternalConns(c.ApierSConns),
utils.StatSConnsCfg: stripInternalConns(c.StatSConns), utils.StatSConnsCfg: stripInternalConns(c.StatSConns),
utils.StatQueueIDsCfg: c.StatQueueIDs, utils.StatQueueIDsCfg: c.StatQueueIDs,
} }
@@ -109,6 +115,7 @@ func (c PrometheusAgentCfg) Clone() *PrometheusAgentCfg {
CacheSConns: slices.Clone(c.CacheSConns), CacheSConns: slices.Clone(c.CacheSConns),
CacheIDs: slices.Clone(c.CacheIDs), CacheIDs: slices.Clone(c.CacheIDs),
CoreSConns: slices.Clone(c.CoreSConns), CoreSConns: slices.Clone(c.CoreSConns),
ApierSConns: slices.Clone(c.ApierSConns),
StatSConns: slices.Clone(c.StatSConns), StatSConns: slices.Clone(c.StatSConns),
StatQueueIDs: slices.Clone(c.StatQueueIDs), StatQueueIDs: slices.Clone(c.StatQueueIDs),
} }

View File

@@ -21,6 +21,7 @@ Example configuration in the JSON file:
"prometheus_agent": { "prometheus_agent": {
"enabled": true, "enabled": true,
"path": "/prometheus", "path": "/prometheus",
"apiers_conns": ["*internal", "external"],
"caches_conns": ["*internal"], "caches_conns": ["*internal"],
"cache_ids": [ "cache_ids": [
"*attribute_filter_indexes", "*attribute_filter_indexes",
@@ -43,6 +44,9 @@ enabled
path path
HTTP endpoint path where Prometheus metrics will be exposed, e.g., "/prometheus" or "/metrics" HTTP endpoint path where Prometheus metrics will be exposed, e.g., "/prometheus" or "/metrics"
apiers_conns
List of connection IDs to ApierS components. Required when stat_queue_ids is empty to fetch all available StatQueue profile IDs. Must match the length of stats_conns when auto-fetching is used. Possible values: <""|*internal|$rpc_conns_id>
caches_conns caches_conns
List of connection IDs to CacheS components for collecting cache statistics. Empty list disables cache metrics collection. Possible values: <""|*internal|$rpc_conns_id> List of connection IDs to CacheS components for collecting cache statistics. Empty list disables cache metrics collection. Possible values: <""|*internal|$rpc_conns_id>
@@ -56,7 +60,7 @@ stats_conns
List of connection IDs to StatS components for collecting StatQueue metrics. Empty list disables StatQueue metrics collection. Possible values: <""|*internal|$rpc_conns_id> List of connection IDs to StatS components for collecting StatQueue metrics. Empty list disables StatQueue metrics collection. Possible values: <""|*internal|$rpc_conns_id>
stat_queue_ids stat_queue_ids
List of StatQueue IDs to collect metrics from. Can include tenant in format <[tenant]:ID>. If tenant is not specified, default tenant from general configuration is used. List of StatQueue IDs to collect metrics from. Can include tenant in format <[tenant]:ID>. If tenant is not specified, default tenant from general configuration is used. Leave empty to automatically collect metrics from all available StatQueues (requires apiers_conns).
Available Metrics Available Metrics
----------------- -----------------
@@ -135,6 +139,11 @@ The PrometheusAgent operates differently than other CGRateS components that use
- When multiple connections are configured in caches_conns, the agent collects cache statistics from **all** connections for the specified cache_ids - When multiple connections are configured in caches_conns, the agent collects cache statistics from **all** connections for the specified cache_ids
- The agent processes metrics requests only when Prometheus sends a scrape request to the configured HTTP endpoint - The agent processes metrics requests only when Prometheus sends a scrape request to the configured HTTP endpoint
StatQueue metrics are collected based on the ``stat_queue_ids`` configuration. When specific StatQueue IDs are provided, only those StatQueues are monitored. When ``stat_queue_ids`` is left empty, all available StatQueues are monitored by fetching StatQueue profile IDs from the configured ``apiers_conns``.
.. note::
When fetching all StatQueues (empty stat_queue_ids), each ApierS connection in ``apiers_conns`` corresponds to its StatS counterpart at the same index position in ``stats_conns``.
You can view all exported metrics and see what Prometheus would scrape by making a simple curl request to the HTTP endpoint: You can view all exported metrics and see what Prometheus would scrape by making a simple curl request to the HTTP endpoint:
.. code-block:: bash .. code-block:: bash

View File

@@ -67,6 +67,7 @@ func TestPrometheusAgentIT(t *testing.T) {
"*stat_filter_indexes", "*stat_filter_indexes",
"*rpc_connections" "*rpc_connections"
], ],
// "apiers_conns": ["*internal", "external"],
"stats_conns": ["*internal", "external"], "stats_conns": ["*internal", "external"],
"stat_queue_ids": ["cgrates.org:SQ_1","SQ_2"] "stat_queue_ids": ["cgrates.org:SQ_1","SQ_2"]
} }