From d99cf35b07b03166f8675b95fd0ab6f818a12fca Mon Sep 17 00:00:00 2001 From: MarkJoyMa <64180138+MarkJoyMa@users.noreply.github.com> Date: Sat, 7 Jun 2025 21:12:31 +0800 Subject: [PATCH] Feat/continue profiling (#4867) Co-authored-by: aiden.ma Co-authored-by: aiden.ma --- core/service/serviceconf.go | 5 + go.mod | 2 + go.sum | 4 + internal/profiling/profiling.go | 267 +++++++++++++++++++++++++++ internal/profiling/profiling_test.go | 143 ++++++++++++++ 5 files changed, 421 insertions(+) create mode 100644 internal/profiling/profiling.go create mode 100644 internal/profiling/profiling_test.go diff --git a/core/service/serviceconf.go b/core/service/serviceconf.go index bc3ab967c..b0edcf599 100644 --- a/core/service/serviceconf.go +++ b/core/service/serviceconf.go @@ -8,6 +8,7 @@ import ( "github.com/zeromicro/go-zero/core/stat" "github.com/zeromicro/go-zero/core/trace" "github.com/zeromicro/go-zero/internal/devserver" + "github.com/zeromicro/go-zero/internal/profiling" ) const ( @@ -38,6 +39,8 @@ type ( Telemetry trace.Config `json:",optional"` DevServer DevServerConfig `json:",optional"` Shutdown proc.ShutdownConf `json:",optional"` + // Profiling is the configuration for profiling. + Profiling profiling.Config `json:",optional"` } ) @@ -72,6 +75,8 @@ func (sc ServiceConf) SetUp() error { } devserver.StartAgent(sc.DevServer) + profiling.Start(sc.Profiling) + return nil } diff --git a/go.mod b/go.mod index b04cbcdf0..a233c8f43 100644 --- a/go.mod +++ b/go.mod @@ -12,6 +12,7 @@ require ( github.com/golang/mock v1.6.0 github.com/golang/protobuf v1.5.4 github.com/google/uuid v1.6.0 + github.com/grafana/pyroscope-go v1.2.2 github.com/jackc/pgx/v5 v5.7.4 github.com/jhump/protoreflect v1.17.0 github.com/pelletier/go-toml/v2 v2.2.2 @@ -71,6 +72,7 @@ require ( github.com/google/gnostic-models v0.6.8 // indirect github.com/google/go-cmp v0.6.0 // indirect github.com/google/gofuzz v1.2.0 // indirect + github.com/grafana/pyroscope-go/godeltaprof v0.1.8 // indirect github.com/grpc-ecosystem/grpc-gateway/v2 v2.20.0 // indirect github.com/h2non/parth v0.0.0-20190131123155-b4df798d6542 // indirect github.com/jackc/pgpassfile v1.0.0 // indirect diff --git a/go.sum b/go.sum index 83ee4ba07..f4d4d3688 100644 --- a/go.sum +++ b/go.sum @@ -80,6 +80,10 @@ github.com/google/pprof v0.0.0-20210720184732-4bb14d4b1be1 h1:K6RDEckDVWvDI9JAJY github.com/google/pprof v0.0.0-20210720184732-4bb14d4b1be1/go.mod h1:kpwsk12EmLew5upagYY7GY0pfYCcupk39gWOCRROcvE= github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0= github.com/google/uuid v1.6.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= +github.com/grafana/pyroscope-go v1.2.2 h1:uvKCyZMD724RkaCEMrSTC38Yn7AnFe8S2wiAIYdDPCE= +github.com/grafana/pyroscope-go v1.2.2/go.mod h1:zzT9QXQAp2Iz2ZdS216UiV8y9uXJYQiGE1q8v1FyhqU= +github.com/grafana/pyroscope-go/godeltaprof v0.1.8 h1:iwOtYXeeVSAeYefJNaxDytgjKtUuKQbJqgAIjlnicKg= +github.com/grafana/pyroscope-go/godeltaprof v0.1.8/go.mod h1:2+l7K7twW49Ct4wFluZD3tZ6e0SjanjcUUBPVD/UuGU= github.com/grpc-ecosystem/grpc-gateway/v2 v2.20.0 h1:bkypFPDjIYGfCYD5mRBvpqxfYX1YCS1PXdKYWi8FsN0= github.com/grpc-ecosystem/grpc-gateway/v2 v2.20.0/go.mod h1:P+Lt/0by1T8bfcF3z737NnSbmxQAppXMRziHUxPOC8k= github.com/h2non/parth v0.0.0-20190131123155-b4df798d6542 h1:2VTzZjLZBgl62/EtslCrtky5vbi9dd7HrQPQIx6wqiw= diff --git a/internal/profiling/profiling.go b/internal/profiling/profiling.go new file mode 100644 index 000000000..7f7a795dd --- /dev/null +++ b/internal/profiling/profiling.go @@ -0,0 +1,267 @@ +package profiling + +import ( + "runtime" + "sync" + "time" + + "github.com/grafana/pyroscope-go" + + "github.com/zeromicro/go-zero/core/logx" + "github.com/zeromicro/go-zero/core/proc" + "github.com/zeromicro/go-zero/core/stat" + "github.com/zeromicro/go-zero/core/threading" +) + +type ( + Config struct { + // Name is the name of the application. + Name string `json:",optional,inherit"` + // ServerAddress is the address of the profiling server. + ServerAddress string + // AuthUser is the username for basic authentication. + AuthUser string `json:",optional"` + // AuthPassword is the password for basic authentication. + AuthPassword string `json:",optional"` + // UploadDuration is the duration for which profiling data is uploaded. + UploadDuration time.Duration `json:",default=15s"` + // IntervalDuration is the interval for which profiling data is collected. + IntervalDuration time.Duration `json:",default=10s"` + // ProfilingDuration is the duration for which profiling data is collected. + ProfilingDuration time.Duration `json:",default=2m"` + // CpuThreshold the collection is allowed only when the current service cpu < CpuThreshold + CpuThreshold int64 `json:",default=700,range=[0:1000)"` + + // ProfileType is the type of profiling to be performed. + ProfileType ProfileType + } + + ProfileType struct { + // Logger is a flag to enable or disable logging. + Logger bool `json:",default=false"` + // CPU is a flag to disable CPU profiling. + CPU bool `json:",default=true"` + // Goroutines is a flag to disable goroutine profiling. + Goroutines bool `json:",default=true"` + // Memory is a flag to disable memory profiling. + Memory bool `json:",default=true"` + // Mutex is a flag to disable mutex profiling. + Mutex bool `json:",default=false"` + // Block is a flag to disable block profiling. + Block bool `json:",default=false"` + } + + profiler interface { + Start() error + Stop() error + } + + pyProfiler struct { + c Config + profiler *pyroscope.Profiler + } +) + +var ( + once sync.Once + + newProfiler = func(c Config) profiler { + return newPyProfiler(c) + } +) + +// Start initializes the pyroscope profiler with the given configuration. +func Start(c Config) { + // check if the profiling is enabled + if c.ServerAddress == "" { + return + } + + // set default values for the configuration + if c.ProfilingDuration <= 0 { + c.ProfilingDuration = time.Minute * 2 + } + + // set default values for the configuration + if c.IntervalDuration <= 0 { + c.IntervalDuration = time.Second * 10 + } + + if c.UploadDuration <= 0 { + c.UploadDuration = time.Second * 15 + } + + once.Do(func() { + logx.Info("continuous profiling started") + + var done = make(chan struct{}) + proc.AddShutdownListener(func() { + done <- struct{}{} + close(done) + }) + + threading.GoSafe(func() { + startPyroScope(c, done) + }) + }) +} + +// startPyroScope starts the pyroscope profiler with the given configuration. +func startPyroScope(c Config, done <-chan struct{}) { + var ( + intervalTicker = time.NewTicker(c.IntervalDuration) + profilingTicker = time.NewTicker(c.ProfilingDuration) + + pr profiler + err error + + latestProfilingTime time.Time + ) + + for { + select { + case <-intervalTicker.C: + // Check if the machine is overloaded and if the profiler is not running + if pr == nil && checkMachinePerformance(c) { + pr = newProfiler(c) + if err := pr.Start(); err != nil { + logx.Errorf("failed to start profiler: %v", err) + continue + } + + // record the latest profiling time + latestProfilingTime = time.Now() + logx.Infof("pyroScope profiler started.") + } + case <-profilingTicker.C: + // check if the profiling duration has passed + if !time.Now().After(latestProfilingTime.Add(c.ProfilingDuration)) { + continue + } + + // check if the profiler is already running, if so, skip + if pr != nil { + if err = pr.Stop(); err != nil { + logx.Errorf("failed to stop profiler: %v", err) + } + logx.Infof("pyroScope profiler stopped.") + pr = nil + } + case <-done: + logx.Infof("continuous profiling stopped.") + intervalTicker.Stop() + profilingTicker.Stop() + return + } + } +} + +// genPyroScopeConf generates the pyroscope configuration based on the given config. +func genPyroScopeConf(c Config) pyroscope.Config { + pConf := pyroscope.Config{ + UploadRate: c.UploadDuration, + ApplicationName: c.Name, + BasicAuthUser: c.AuthUser, // http basic auth user + BasicAuthPassword: c.AuthPassword, // http basic auth password + ServerAddress: c.ServerAddress, + Logger: nil, + + HTTPHeaders: map[string]string{}, + + // you can provide static tags via a map: + Tags: map[string]string{ + "name": c.Name, + }, + } + + if c.ProfileType.Logger { + pConf.Logger = logx.WithCallerSkip(0) + } + + if c.ProfileType.CPU { + pConf.ProfileTypes = append(pConf.ProfileTypes, pyroscope.ProfileCPU) + } + if c.ProfileType.Goroutines { + pConf.ProfileTypes = append(pConf.ProfileTypes, pyroscope.ProfileGoroutines) + } + if c.ProfileType.Memory { + pConf.ProfileTypes = append(pConf.ProfileTypes, pyroscope.ProfileAllocObjects, pyroscope.ProfileAllocSpace, + pyroscope.ProfileInuseObjects, pyroscope.ProfileInuseSpace) + } + if c.ProfileType.Mutex { + pConf.ProfileTypes = append(pConf.ProfileTypes, pyroscope.ProfileMutexCount, pyroscope.ProfileMutexDuration) + } + if c.ProfileType.Block { + pConf.ProfileTypes = append(pConf.ProfileTypes, pyroscope.ProfileBlockCount, pyroscope.ProfileBlockDuration) + } + + logx.Infof("applicationName: %s", pConf.ApplicationName) + + return pConf +} + +// checkMachinePerformance checks the machine performance based on the given configuration. +func checkMachinePerformance(c Config) bool { + currentValue := stat.CpuUsage() + if currentValue >= c.CpuThreshold { + logx.Infof("continuous profiling cpu overload, cpu:%d", currentValue) + return true + } + + return false +} + +func newPyProfiler(c Config) profiler { + return &pyProfiler{ + c: c, + } +} + +func (p *pyProfiler) Start() error { + pConf := genPyroScopeConf(p.c) + // set mutex and block profile rate + setFraction(p.c) + profiler, err := pyroscope.Start(pConf) + if err != nil { + resetFraction(p.c) + return err + } + + p.profiler = profiler + return nil +} + +func (p *pyProfiler) Stop() error { + if p.profiler == nil { + return nil + } + + err := p.profiler.Stop() + if err != nil { + return err + } + resetFraction(p.c) + p.profiler = nil + + return nil +} + +func setFraction(c Config) { + // These 2 lines are only required if you're using mutex or block profiling + if c.ProfileType.Mutex { + runtime.SetMutexProfileFraction(10) // 10/seconds + } + if c.ProfileType.Block { + runtime.SetBlockProfileRate(1000 * 1000) // 1/millisecond + } +} + +func resetFraction(c Config) { + // These 2 lines are only required if you're using mutex or block profiling + if c.ProfileType.Mutex { + runtime.SetMutexProfileFraction(0) + } + if c.ProfileType.Block { + runtime.SetBlockProfileRate(0) + } +} diff --git a/internal/profiling/profiling_test.go b/internal/profiling/profiling_test.go new file mode 100644 index 000000000..b80639fc8 --- /dev/null +++ b/internal/profiling/profiling_test.go @@ -0,0 +1,143 @@ +package profiling + +import ( + "sync" + "testing" + "time" + + "github.com/grafana/pyroscope-go" + "github.com/stretchr/testify/assert" +) + +func TestStart(t *testing.T) { + t.Run("invalid config", func(t *testing.T) { + var mockProfiler = &mockProfiler{} + newProfiler = func(c Config) profiler { + return mockProfiler + } + + Start(Config{}) + + Start(Config{ + ServerAddress: "localhost:4040", + }) + }) + + t.Run("test start profiler", func(t *testing.T) { + var mockProfiler = &mockProfiler{} + newProfiler = func(c Config) profiler { + return mockProfiler + } + + c := Config{ + Name: "test", + ServerAddress: "localhost:4040", + IntervalDuration: time.Millisecond, + ProfilingDuration: time.Millisecond * 10, + CpuThreshold: 0, + } + var done = make(chan struct{}) + go startPyroScope(c, done) + + time.Sleep(time.Millisecond * 50) + done <- struct{}{} + + assert.True(t, mockProfiler.started) + assert.True(t, mockProfiler.stopped) + }) + + t.Run("start/stop err", func(t *testing.T) { + var mockProfiler = &mockProfiler{ + err: assert.AnError, + } + newProfiler = func(c Config) profiler { + return mockProfiler + } + + c := Config{ + Name: "test", + ServerAddress: "localhost:4040", + IntervalDuration: time.Millisecond, + ProfilingDuration: time.Millisecond * 10, + CpuThreshold: 0, + } + var done = make(chan struct{}) + go startPyroScope(c, done) + + time.Sleep(time.Millisecond * 50) + done <- struct{}{} + + assert.False(t, mockProfiler.started) + assert.False(t, mockProfiler.stopped) + }) +} + +func TestGenPyroScopeConf(t *testing.T) { + c := Config{ + Name: "", + ServerAddress: "localhost:4040", + AuthUser: "user", + AuthPassword: "password", + ProfileType: ProfileType{ + Logger: true, + CPU: true, + Goroutines: true, + Memory: true, + Mutex: true, + Block: true, + }, + } + + conf := genPyroScopeConf(c) + assert.Equal(t, c.ServerAddress, conf.ServerAddress) + assert.Equal(t, c.AuthUser, conf.BasicAuthUser) + assert.Equal(t, c.AuthPassword, conf.BasicAuthPassword) + assert.Equal(t, c.Name, conf.ApplicationName) + assert.Contains(t, conf.ProfileTypes, pyroscope.ProfileCPU) + assert.Contains(t, conf.ProfileTypes, pyroscope.ProfileGoroutines) + assert.Contains(t, conf.ProfileTypes, pyroscope.ProfileAllocObjects) + assert.Contains(t, conf.ProfileTypes, pyroscope.ProfileAllocSpace) + assert.Contains(t, conf.ProfileTypes, pyroscope.ProfileInuseObjects) + assert.Contains(t, conf.ProfileTypes, pyroscope.ProfileInuseSpace) + assert.Contains(t, conf.ProfileTypes, pyroscope.ProfileMutexCount) + assert.Contains(t, conf.ProfileTypes, pyroscope.ProfileMutexDuration) + assert.Contains(t, conf.ProfileTypes, pyroscope.ProfileBlockCount) + assert.Contains(t, conf.ProfileTypes, pyroscope.ProfileBlockDuration) + + setFraction(c) + resetFraction(c) + + newPyProfiler(c) +} + +func TestNewPyProfiler(t *testing.T) { + p := newPyProfiler(Config{}) + + assert.Error(t, p.Start()) + assert.NoError(t, p.Stop()) +} + +type mockProfiler struct { + mutex sync.Mutex + started bool + stopped bool + err error +} + +func (m *mockProfiler) Start() error { + m.mutex.Lock() + if m.err == nil { + m.started = true + } + m.mutex.Unlock() + return m.err +} + +func (m *mockProfiler) Stop() error { + m.mutex.Lock() + if m.err == nil { + m.stopped = true + } + m.mutex.Unlock() + return m.err +}