mirror of
https://github.com/cgrates/cgrates.git
synced 2026-02-11 18:16:24 +05:00
prometheus: fetch all StatQueues when IDs list is empty
This commit is contained in:
committed by
Dan Christian Bogos
parent
d5f49ba1a2
commit
e019aeabab
@@ -216,11 +216,23 @@ func (pa *PrometheusAgent) ServeHTTP(w http.ResponseWriter, r *http.Request) {
|
||||
// updateStatsMetrics fetches and updates all StatQueue metrics by calling each
|
||||
// configured StatS connection.
|
||||
func (pa *PrometheusAgent) updateStatsMetrics() {
|
||||
if len(pa.cfg.PrometheusAgentCfg().StatQueueIDs) == 0 {
|
||||
return
|
||||
}
|
||||
for _, connID := range pa.cfg.PrometheusAgentCfg().StatSConns {
|
||||
for _, sqID := range pa.cfg.PrometheusAgentCfg().StatQueueIDs {
|
||||
for connIdx, connID := range pa.cfg.PrometheusAgentCfg().StatSConns {
|
||||
sqIDs := pa.cfg.PrometheusAgentCfg().StatQueueIDs
|
||||
|
||||
// When no StatQueueIDs set, fetch all available ones.
|
||||
if len(sqIDs) == 0 {
|
||||
adminsConnID := pa.cfg.PrometheusAgentCfg().AdminSConns[connIdx]
|
||||
if err := pa.cm.Call(context.Background(), []string{adminsConnID},
|
||||
utils.AdminSv1GetStatQueueProfileIDs,
|
||||
&utils.ArgsItemIDs{}, &sqIDs); err != nil {
|
||||
utils.Logger.Err(fmt.Sprintf(
|
||||
"<%s> failed to retrieve all StatQueue IDs (connID=%q): %v",
|
||||
utils.PrometheusAgent, adminsConnID, err))
|
||||
continue
|
||||
}
|
||||
}
|
||||
|
||||
for _, sqID := range sqIDs {
|
||||
|
||||
tenantID := utils.NewTenantID(sqID)
|
||||
if tenantID.Tenant == "" {
|
||||
|
||||
@@ -1089,6 +1089,7 @@ const CGRATES_CFG_JSON = `
|
||||
"prometheus_agent": {
|
||||
"enabled": false, // enables the prometheus agent: <true|false>
|
||||
"path": "/prometheus", // endpoint for prometheus metrics
|
||||
"admins_conns": [], // connections to AdminS, empty to disable: <""|*internal|$rpc_conns_id>
|
||||
"caches_conns": [], // connections to CacheS, empty to disable: <""|*internal|$rpc_conns_id>
|
||||
"cache_ids": [], // cache partition IDs to collect statistics for, empty for all partitions
|
||||
"cores_conns": [], // connections to CoreS, empty to disable: <""|*internal|$rpc_conns_id>
|
||||
|
||||
@@ -1253,8 +1253,46 @@ func (cfg *CGRConfig) checkConfigSanity() error {
|
||||
return fmt.Errorf("<%s> the CleanupInterval needs to be bigger than 0", utils.AnalyzerS)
|
||||
}
|
||||
}
|
||||
if err := cfg.prometheusAgentCfg.validate(cfg); err != nil {
|
||||
return err
|
||||
if cfg.prometheusAgentCfg.Enabled {
|
||||
if len(cfg.prometheusAgentCfg.StatSConns) > 0 &&
|
||||
len(cfg.prometheusAgentCfg.StatQueueIDs) == 0 &&
|
||||
len(cfg.prometheusAgentCfg.StatSConns) != len(cfg.prometheusAgentCfg.AdminSConns) {
|
||||
return fmt.Errorf(
|
||||
"<%s> when StatQueueIDs is empty, admins_conns must match stats_conns length to fetch StatQueue IDs",
|
||||
utils.PrometheusAgent)
|
||||
}
|
||||
for _, connID := range cfg.prometheusAgentCfg.AdminSConns {
|
||||
if strings.HasPrefix(connID, utils.MetaInternal) && !cfg.admS.Enabled {
|
||||
return fmt.Errorf("<%s> not enabled but requested by <%s> component", utils.AdminS, utils.PrometheusAgent)
|
||||
}
|
||||
if _, has := cfg.rpcConns[connID]; !has && !strings.HasPrefix(connID, utils.MetaInternal) {
|
||||
return fmt.Errorf("<%s> connection with id: <%s> not defined", utils.PrometheusAgent, connID)
|
||||
}
|
||||
}
|
||||
for _, connID := range cfg.prometheusAgentCfg.CacheSConns {
|
||||
if _, has := cfg.rpcConns[connID]; !has && !strings.HasPrefix(connID, utils.MetaInternal) {
|
||||
return fmt.Errorf("<%s> connection with id: <%s> not defined", utils.PrometheusAgent, connID)
|
||||
}
|
||||
}
|
||||
for _, connID := range cfg.prometheusAgentCfg.CoreSConns {
|
||||
if _, has := cfg.rpcConns[connID]; !has && !strings.HasPrefix(connID, utils.MetaInternal) {
|
||||
return fmt.Errorf("<%s> connection with id: <%s> not defined", utils.PrometheusAgent, connID)
|
||||
}
|
||||
}
|
||||
for _, connID := range cfg.prometheusAgentCfg.StatSConns {
|
||||
if strings.HasPrefix(connID, utils.MetaInternal) && !cfg.statsCfg.Enabled {
|
||||
return fmt.Errorf("<%s> not enabled but requested by <%s> component", utils.StatService, utils.PrometheusAgent)
|
||||
}
|
||||
if _, has := cfg.rpcConns[connID]; !has && !strings.HasPrefix(connID, utils.MetaInternal) {
|
||||
return fmt.Errorf("<%s> connection with id: <%s> not defined", utils.PrometheusAgent, connID)
|
||||
}
|
||||
}
|
||||
if len(cfg.prometheusAgentCfg.CoreSConns) > 0 {
|
||||
if cfg.prometheusAgentCfg.CollectGoMetrics || cfg.prometheusAgentCfg.CollectProcessMetrics {
|
||||
return fmt.Errorf("<%s> collect_go_metrics and collect_process_metrics cannot be enabled when using CoreSConns",
|
||||
utils.PrometheusAgent)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
|
||||
@@ -19,9 +19,7 @@ along with this program. If not, see <https://www.gnu.org/licenses/>
|
||||
package config
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"slices"
|
||||
"strings"
|
||||
|
||||
"github.com/cgrates/birpc/context"
|
||||
"github.com/cgrates/cgrates/utils"
|
||||
@@ -33,6 +31,7 @@ type PrometheusAgentJsonCfg struct {
|
||||
Path *string `json:"path"`
|
||||
CollectGoMetrics *bool `json:"collect_go_metrics"`
|
||||
CollectProcessMetrics *bool `json:"collect_process_metrics"`
|
||||
AdminSConns *[]string `json:"admins_conns"`
|
||||
CacheSConns *[]string `json:"caches_conns"`
|
||||
CacheIDs *[]string `json:"cache_ids"`
|
||||
CoreSConns *[]string `json:"cores_conns"`
|
||||
@@ -46,6 +45,7 @@ type PrometheusAgentCfg struct {
|
||||
Path string
|
||||
CollectGoMetrics bool
|
||||
CollectProcessMetrics bool
|
||||
AdminSConns []string
|
||||
CacheSConns []string
|
||||
CacheIDs []string
|
||||
CoreSConns []string
|
||||
@@ -78,6 +78,9 @@ func (c *PrometheusAgentCfg) loadFromJSONCfg(jc *PrometheusAgentJsonCfg) error {
|
||||
if jc.CollectProcessMetrics != nil {
|
||||
c.CollectProcessMetrics = *jc.CollectProcessMetrics
|
||||
}
|
||||
if jc.AdminSConns != nil {
|
||||
c.AdminSConns = tagInternalConns(*jc.AdminSConns, utils.MetaAdminS)
|
||||
}
|
||||
if jc.CacheSConns != nil {
|
||||
c.CacheSConns = tagInternalConns(*jc.CacheSConns, utils.MetaCaches)
|
||||
}
|
||||
@@ -103,6 +106,7 @@ func (c PrometheusAgentCfg) AsMapInterface() any {
|
||||
utils.PathCfg: c.Path,
|
||||
utils.CollectGoMetricsCfg: c.CollectGoMetrics,
|
||||
utils.CollectProcessMetricsCfg: c.CollectProcessMetrics,
|
||||
utils.AdminSConnsCfg: stripInternalConns(c.AdminSConns),
|
||||
utils.CacheSConnsCfg: stripInternalConns(c.CacheSConns),
|
||||
utils.CacheIDsCfg: stripInternalConns(c.CacheIDs),
|
||||
utils.CoreSConnsCfg: stripInternalConns(c.CoreSConns),
|
||||
@@ -121,6 +125,7 @@ func (c PrometheusAgentCfg) Clone() *PrometheusAgentCfg {
|
||||
Path: c.Path,
|
||||
CollectGoMetrics: c.CollectGoMetrics,
|
||||
CollectProcessMetrics: c.CollectProcessMetrics,
|
||||
AdminSConns: slices.Clone(c.AdminSConns),
|
||||
CacheSConns: slices.Clone(c.CacheSConns),
|
||||
CacheIDs: slices.Clone(c.CacheIDs),
|
||||
CoreSConns: slices.Clone(c.CoreSConns),
|
||||
@@ -129,37 +134,6 @@ func (c PrometheusAgentCfg) Clone() *PrometheusAgentCfg {
|
||||
}
|
||||
}
|
||||
|
||||
func (c PrometheusAgentCfg) validate(cfg *CGRConfig) error {
|
||||
if !c.Enabled {
|
||||
return nil
|
||||
}
|
||||
for _, connID := range cfg.prometheusAgentCfg.CacheSConns {
|
||||
if _, has := cfg.rpcConns[connID]; !has && !strings.HasPrefix(connID, utils.MetaInternal) {
|
||||
return fmt.Errorf("<%s> connection with id: <%s> not defined", utils.PrometheusAgent, connID)
|
||||
}
|
||||
}
|
||||
for _, connID := range cfg.prometheusAgentCfg.CoreSConns {
|
||||
if _, has := cfg.rpcConns[connID]; !has && !strings.HasPrefix(connID, utils.MetaInternal) {
|
||||
return fmt.Errorf("<%s> connection with id: <%s> not defined", utils.PrometheusAgent, connID)
|
||||
}
|
||||
}
|
||||
for _, connID := range c.StatSConns {
|
||||
if strings.HasPrefix(connID, utils.MetaInternal) && !cfg.statsCfg.Enabled {
|
||||
return fmt.Errorf("<%s> not enabled but requested by <%s> component", utils.StatService, utils.PrometheusAgent)
|
||||
}
|
||||
if _, has := cfg.rpcConns[connID]; !has && !strings.HasPrefix(connID, utils.MetaInternal) {
|
||||
return fmt.Errorf("<%s> connection with id: <%s> not defined", utils.PrometheusAgent, connID)
|
||||
}
|
||||
}
|
||||
if len(c.CoreSConns) > 0 {
|
||||
if c.CollectGoMetrics || c.CollectProcessMetrics {
|
||||
return fmt.Errorf("<%s> collect_go_metrics and collect_process_metrics cannot be enabled when using CoreSConns",
|
||||
utils.PrometheusAgent)
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func diffPrometheusAgentJsonCfg(d *PrometheusAgentJsonCfg, v1, v2 *PrometheusAgentCfg) *PrometheusAgentJsonCfg {
|
||||
if d == nil {
|
||||
d = new(PrometheusAgentJsonCfg)
|
||||
@@ -177,6 +151,9 @@ func diffPrometheusAgentJsonCfg(d *PrometheusAgentJsonCfg, v1, v2 *PrometheusAge
|
||||
if v1.CollectProcessMetrics != v2.CollectProcessMetrics && true {
|
||||
d.CollectProcessMetrics = utils.BoolPointer(v2.CollectProcessMetrics)
|
||||
}
|
||||
if !slices.Equal(v1.AdminSConns, v2.AdminSConns) {
|
||||
d.AdminSConns = utils.SliceStringPointer(v2.AdminSConns)
|
||||
}
|
||||
if !slices.Equal(v1.CoreSConns, v2.CoreSConns) {
|
||||
d.CoreSConns = utils.SliceStringPointer(v2.CoreSConns)
|
||||
}
|
||||
|
||||
@@ -21,6 +21,7 @@ Example configuration in the JSON file:
|
||||
"prometheus_agent": {
|
||||
"enabled": true,
|
||||
"path": "/prometheus",
|
||||
"apiers_conns": ["*internal", "external"],
|
||||
"caches_conns": ["*internal"],
|
||||
"cache_ids": [
|
||||
"*attribute_filter_indexes",
|
||||
@@ -43,6 +44,9 @@ enabled
|
||||
path
|
||||
HTTP endpoint path where Prometheus metrics will be exposed, e.g., "/prometheus" or "/metrics"
|
||||
|
||||
apiers_conns
|
||||
List of connection IDs to ApierS components. Required when stat_queue_ids is empty to fetch all available StatQueue profile IDs. Must match the length of stats_conns when auto-fetching is used. Possible values: <""|*internal|$rpc_conns_id>
|
||||
|
||||
caches_conns
|
||||
List of connection IDs to CacheS components for collecting cache statistics. Empty list disables cache metrics collection. Possible values: <""|*internal|$rpc_conns_id>
|
||||
|
||||
@@ -56,7 +60,7 @@ stats_conns
|
||||
List of connection IDs to StatS components for collecting StatQueue metrics. Empty list disables StatQueue metrics collection. Possible values: <""|*internal|$rpc_conns_id>
|
||||
|
||||
stat_queue_ids
|
||||
List of StatQueue IDs to collect metrics from. Can include tenant in format <[tenant]:ID>. If tenant is not specified, default tenant from general configuration is used.
|
||||
List of StatQueue IDs to collect metrics from. Can include tenant in format <[tenant]:ID>. If tenant is not specified, default tenant from general configuration is used. Leave empty to automatically collect metrics from all available StatQueues (requires apiers_conns).
|
||||
|
||||
Available Metrics
|
||||
-----------------
|
||||
@@ -135,6 +139,11 @@ The PrometheusAgent operates differently than other CGRateS components that use
|
||||
- When multiple connections are configured in caches_conns, the agent collects cache statistics from **all** connections for the specified cache_ids
|
||||
- The agent processes metrics requests only when Prometheus sends a scrape request to the configured HTTP endpoint
|
||||
|
||||
StatQueue metrics are collected based on the ``stat_queue_ids`` configuration. When specific StatQueue IDs are provided, only those StatQueues are monitored. When ``stat_queue_ids`` is left empty, all available StatQueues are monitored by fetching StatQueue profile IDs from the configured ``apiers_conns``.
|
||||
|
||||
.. note::
|
||||
When fetching all StatQueues (empty stat_queue_ids), each ApierS connection in ``apiers_conns`` corresponds to its StatS counterpart at the same index position in ``stats_conns``.
|
||||
|
||||
You can view all exported metrics and see what Prometheus would scrape by making a simple curl request to the HTTP endpoint:
|
||||
|
||||
.. code-block:: bash
|
||||
|
||||
@@ -24,7 +24,6 @@ import (
|
||||
"bytes"
|
||||
"fmt"
|
||||
"io"
|
||||
"math/rand"
|
||||
"net/http"
|
||||
"testing"
|
||||
"time"
|
||||
@@ -76,7 +75,8 @@ func TestPrometheusAgentIT(t *testing.T) {
|
||||
"*stat_filter_indexes",
|
||||
"*rpc_connections"
|
||||
],
|
||||
"stats_conns": ["*localhost", "external"],
|
||||
// "apiers_conns": ["*internal", "external"],
|
||||
"stats_conns": ["*internal", "external"],
|
||||
"stat_queue_ids": ["cgrates.org:SQ_1","SQ_2"]
|
||||
}
|
||||
}`
|
||||
@@ -169,8 +169,8 @@ func processStats(t *testing.T, client *birpc.Client) {
|
||||
ID: utils.GenUUID(),
|
||||
Event: map[string]any{},
|
||||
APIOpts: map[string]any{
|
||||
utils.MetaUsage: time.Duration(rand.Intn(3600)+60) * time.Second,
|
||||
utils.MetaCost: rand.Float64()*20 + 0.1,
|
||||
utils.MetaUsage: time.Duration(i) * time.Second,
|
||||
utils.MetaCost: i * 10,
|
||||
utils.OptsStatsProfileIDs: fmt.Sprintf("SQ_%d", i+1),
|
||||
},
|
||||
}, &reply); err != nil {
|
||||
|
||||
Reference in New Issue
Block a user