Files
cgrates/services/cores.go
ionutboangiu c7dbcaea03 Revise CPU/Memory profiling
CPU profiling changes:

cgr-engine.go:
- use filepath.Join instead of path.Join
- handle *CoreService.StopCPUProfiling error inside deferred function
- same with the error from *os.File.Close()

cores/core.go:
- StartCPUProfile now returns an *os.File (as opposed to an io.WriteCloser),
  because os.File.Stat is used beforehand to check if a handler of the file is
  already active and confirm the status of profiling.  Asserting the type would
  have worked as well.
- handle pprof.StartCPUProfile error and ensure file is closed before returning
- log file close error as a warning if it occurs
- return missing mandatory error with correct path field name ('DirPath')
- no need to check if fileCPU is nil for profiling status
  - pprof.StartCPUProfiling will return an error if profiling is already started
  - os.File.Close() will return ErrClosed if profiling is already stopped
- differentiate between calling StopCPUProfiling when profiling hasn't started
and when it was already stopped by returning appropriate errors

Memory profiling changes:

- merge StopChanMemProf with StopMemoryProfiling
- remove fileMEM and stopMemProf from struct and constructors
- add separate mutex for memory profiling, ensure thread safety
- handle all significant errors
- log error if StopMemoryProfiling fails during CoreS Shutdown
- ignore errors if profiling inactive in Shutdown and deferred Stop
- move validations inside V1 functions
- return error if StartMemoryProfiling already started
- return error if StopMemoryProfiling already stopped or never started
- close profiling loop on error, not the cgr-engine
- StopMemoryProfiling closes channel and profiling loop writes final profile
- rename Path to DirPath for mandatory field error
- rename memprof_nrfiles flag to memprof_maxfiles
- increase default memprof_interval
- consider MaxFiles <= 0 as unlimited
- move memory profiling logic after starting services
- use CoreService Start/StopMemoryProfiling in main
- remove final memory profile block (created by deferred Stop)
- convert MemProfiling to method on CoreService and rename to profileMemory
- use Ticker for recurrent actions instead of Timer
- compute mem_final.prof full path in StartMemoryProfiling
- suffix profile files with current time instead of numbers
- update dispatcher methods after changes
- move MemoryPrf from utils to cores, rename to MemoryProfilingParams
- add logs for starting/stopping profiling
- added the possibility to disable timestamps in the memory profile file names
  and use increments of 1 instead.

Other changes:

- improved integration tests for flags (now table tests)
- improved profiling integration tests
2024-11-01 15:59:39 +01:00

143 lines
3.6 KiB
Go

/*
Real-time Online/Offline Charging System (OCS) for Telecom & ISP environments
Copyright (C) ITsysCOM GmbH
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>
*/
package services
import (
"fmt"
"os"
"sync"
"github.com/cgrates/birpc"
"github.com/cgrates/birpc/context"
"github.com/cgrates/cgrates/config"
"github.com/cgrates/cgrates/cores"
"github.com/cgrates/cgrates/engine"
"github.com/cgrates/cgrates/utils"
)
// NewCoreService returns the Core Service
func NewCoreService(cfg *config.CGRConfig, caps *engine.Caps, server *cores.Server,
internalCoreSChan chan birpc.ClientConnector, anz *AnalyzerService,
fileCPU *os.File, shdWg *sync.WaitGroup,
srvDep map[string]*sync.WaitGroup) *CoreService {
return &CoreService{
shdWg: shdWg,
connChan: internalCoreSChan,
cfg: cfg,
caps: caps,
fileCPU: fileCPU,
server: server,
anz: anz,
srvDep: srvDep,
csCh: make(chan *cores.CoreS, 1),
}
}
// CoreService implements Service interface
type CoreService struct {
mu sync.RWMutex
cfg *config.CGRConfig
server *cores.Server
caps *engine.Caps
stopChan chan struct{}
shdWg *sync.WaitGroup
fileCPU *os.File
cS *cores.CoreS
connChan chan birpc.ClientConnector
anz *AnalyzerService
srvDep map[string]*sync.WaitGroup
csCh chan *cores.CoreS
}
// Start should handle the service start
func (cS *CoreService) Start(_ *context.Context, shtDw context.CancelFunc) error {
if cS.IsRunning() {
return utils.ErrServiceAlreadyRunning
}
cS.mu.Lock()
defer cS.mu.Unlock()
utils.Logger.Info(fmt.Sprintf("<%s> starting <%s> subsystem", utils.CoreS, utils.CoreS))
cS.stopChan = make(chan struct{})
cS.cS = cores.NewCoreService(cS.cfg, cS.caps, cS.fileCPU, cS.stopChan, cS.shdWg, shtDw)
cS.csCh <- cS.cS
srv, err := engine.NewService(cS.cS)
if err != nil {
return err
}
if !cS.cfg.DispatcherSCfg().Enabled {
for _, s := range srv {
cS.server.RpcRegister(s)
}
}
cS.connChan <- cS.anz.GetInternalCodec(srv, utils.CoreS)
return nil
}
// Reload handles the change of config
func (cS *CoreService) Reload(*context.Context, context.CancelFunc) error {
return nil
}
// Shutdown stops the service
func (cS *CoreService) Shutdown() error {
cS.mu.Lock()
defer cS.mu.Unlock()
cS.cS.Shutdown()
close(cS.stopChan)
cS.cS.StopCPUProfiling()
cS.cS.StopMemoryProfiling()
cS.cS = nil
<-cS.connChan
<-cS.csCh
cS.server.RpcUnregisterName(utils.CoreSv1)
return nil
}
// IsRunning returns if the service is running
func (cS *CoreService) IsRunning() bool {
cS.mu.RLock()
defer cS.mu.RUnlock()
return cS.cS != nil
}
// ServiceName returns the service name
func (cS *CoreService) ServiceName() string {
return utils.CoreS
}
// ShouldRun returns if the service should be running
func (cS *CoreService) ShouldRun() bool {
return true
}
// GetCoreS returns the coreS
func (cS *CoreService) WaitForCoreS(ctx *context.Context) (cs *cores.CoreS, err error) {
cS.mu.RLock()
cSCh := cS.csCh
cS.mu.RUnlock()
select {
case <-ctx.Done():
err = ctx.Err()
case cs = <-cSCh:
cSCh <- cs
}
return
}