Revise memory profiling implementation

- merge StopChanMemProf with StopMemoryProfiling
- remove fileMEM and stopMemProf from struct and constructors
- add separate mutex for memory profiling, ensure thread safety
- handle all significant errors
- log error if StopMemoryProfiling fails during CoreS Shutdown
- ignore errors if profiling inactive in Shutdown and deferred Stop
- move validations inside V1 functions
- return error if StartMemoryProfiling already started
- return error if StopMemoryProfiling already stopped or never started
- close profiling loop on error, not the cgr-engine
- StopMemoryProfiling closes channel and profiling loop writes final profile
- rename Path to DirPath for mandatory field error
- rename memprof_nrfiles flag to memprof_maxfiles
- increase default memprof_interval
- consider MaxFiles <= 0 as unlimited
- move memory profiling logic after starting services
- use CoreService Start/StopMemoryProfiling in main
- remove final memory profile block (created by deferred Stop)
- convert MemProfiling to method on CoreService and rename to profileMemory
- use Ticker for recurrent actions instead of Timer
- compute mem_final.prof full path in StartMemoryProfiling
- suffix profile files with current time instead of numbers
- update dispatcher methods after changes
- move MemoryPrf from utils to cores, rename to MemoryProfilingParams
- add logs for starting/stopping profiling
This commit is contained in:
ionutboangiu
2024-07-22 08:20:26 +03:00
committed by Dan Christian Bogos
parent 9d4561f79c
commit 1c490a9020
14 changed files with 271 additions and 239 deletions

View File

@@ -25,7 +25,6 @@ import (
"log"
"os"
"os/signal"
"path"
"path/filepath"
"runtime"
"runtime/pprof"
@@ -59,8 +58,8 @@ var (
httpPprof = cgrEngineFlags.Bool(utils.HttpPprofCgr, false, "Enable HTTP pprof profiling")
cpuProfDir = cgrEngineFlags.String(utils.CpuProfDirCgr, utils.EmptyString, "Directory for CPU profiles")
memProfDir = cgrEngineFlags.String(utils.MemProfDirCgr, utils.EmptyString, "Directory for memory profiles")
memProfInterval = cgrEngineFlags.Duration(utils.MemProfIntervalCgr, 5*time.Second, "Interval between memory profile saves")
memProfNrFiles = cgrEngineFlags.Int(utils.MemProfNrFilesCgr, 1, "Number of memory profiles to keep (most recent)")
memProfInterval = cgrEngineFlags.Duration(utils.MemProfIntervalCgr, 15*time.Second, "Interval between memory profile saves")
memProfMaxFiles = cgrEngineFlags.Int(utils.MemProfMaxFilesCgr, 1, "Number of memory profiles to keep (most recent)")
scheduledShutdown = cgrEngineFlags.Duration(utils.ScheduledShutdownCgr, 0, "Shutdown the engine after the specified duration")
singleCPU = cgrEngineFlags.Bool(utils.SingleCpuCgr, false, "Run on a single CPU core")
syslogger = cgrEngineFlags.String(utils.LoggerCfg, utils.EmptyString, "Logger type <*syslog|*stdout>")
@@ -355,20 +354,6 @@ func main() {
go singnalHandler(shdWg, shdChan)
var cS *cores.CoreService
var stopMemProf chan struct{}
var memPrfDirForCores string
if *memProfDir != utils.EmptyString {
shdWg.Add(1)
stopMemProf = make(chan struct{})
memPrfDirForCores = *memProfDir
go cores.MemProfiling(*memProfDir, *memProfInterval, *memProfNrFiles, shdWg, stopMemProf, shdChan)
defer func() {
if cS == nil {
close(stopMemProf)
}
}()
}
var cpuProf io.Closer
if *cpuProfDir != utils.EmptyString {
cpuPath := filepath.Join(*cpuProfDir, utils.CpuPathCgr)
@@ -409,7 +394,6 @@ func main() {
cfg, err = config.NewCGRConfigFromPath(*cfgPath)
if err != nil {
log.Fatalf("Could not parse config: <%s>", err.Error())
return
}
if *nodeID != utils.EmptyString {
@@ -422,7 +406,6 @@ func main() {
if utils.Logger, err = utils.Newlogger(utils.FirstNonEmpty(*syslogger,
cfg.GeneralCfg().Logger), cfg.GeneralCfg().NodeID); err != nil {
log.Fatalf("Could not initialize syslog connection, err: <%s>", err.Error())
return
}
lgLevel := cfg.GeneralCfg().LogLevel
if *logLevel != -1 { // Modify the log level if provided by command arguments
@@ -580,7 +563,7 @@ func main() {
// init CoreSv1
coreS := services.NewCoreService(cfg, caps, server, internalCoreSv1Chan, anz, cpuProf, *memProfDir, shdWg, stopMemProf, shdChan, srvDep)
coreS := services.NewCoreService(cfg, caps, server, internalCoreSv1Chan, anz, cpuProf, shdWg, shdChan, srvDep)
shdWg.Add(1)
if err := coreS.Start(); err != nil {
log.Fatalf("<%s> error received: <%s>, exiting!", utils.InitS, err.Error())
@@ -710,6 +693,18 @@ func main() {
internalDispatcherSChan, internalLoaderSChan, internalRALsChan,
internalCacheSChan, internalEEsChan, internalERsChan, shdChan)
if *memProfDir != utils.EmptyString {
if err := cS.StartMemoryProfiling(cores.MemoryProfilingParams{
DirPath: *memProfDir,
Interval: *memProfInterval,
MaxFiles: *memProfMaxFiles,
}); err != nil {
utils.Logger.Err(fmt.Sprintf("<%s> %v", utils.CoreS, err))
return
}
defer cS.StopMemoryProfiling() // safe to ignore error (irrelevant)
}
<-shdChan.Done()
shtdDone := make(chan struct{})
go func() {
@@ -723,9 +718,6 @@ func main() {
utils.ServiceManager))
}
if *memProfDir != utils.EmptyString { // write last memory profiling
cores.MemProfFile(path.Join(*memProfDir, utils.MemProfFileCgr))
}
if *pidFile != utils.EmptyString {
if err := os.Remove(*pidFile); err != nil {
utils.Logger.Warning("Could not remove pid file: " + err.Error())

View File

@@ -88,13 +88,13 @@ func TestCgrEngineFlags(t *testing.T) {
name: "memProfInterval",
flags: []string{"-memprof_interval", "1s"},
flagVar: memProfInterval,
defaultVal: 5 * time.Second,
defaultVal: 15 * time.Second,
want: time.Second,
},
{
name: "memProfNrFiles",
flags: []string{"-memprof_nrfiles", "3"},
flagVar: memProfNrFiles,
name: "memProfMaxFiles",
flags: []string{"-memprof_maxfiles", "3"},
flagVar: memProfMaxFiles,
defaultVal: 1,
want: 3,
},