optimize: improve breaker algorithm on recovery time (#4077)

This commit is contained in:
Kevin Wan
2024-04-18 22:33:25 +08:00
committed by GitHub
parent 95b32b5779
commit 1540bdc4c9
10 changed files with 311 additions and 121 deletions

View File

@@ -9,23 +9,38 @@ import (
const (
// 250ms for bucket duration
window = time.Second * 10
buckets = 40
k = 1.5
protection = 5
window = time.Second * 10
buckets = 40
maxFailBucketsToDecreaseK = 30
minBucketsToSpeedUp = 3
k = 1.5
minK = 1.1
recoveryK = 3 - k
protection = 5
)
// googleBreaker is a netflixBreaker pattern from google.
// see Client-Side Throttling section in https://landing.google.com/sre/sre-book/chapters/handling-overload/
type googleBreaker struct {
k float64
stat *collection.RollingWindow
proba *mathx.Proba
}
type (
googleBreaker struct {
k float64
stat *collection.RollingWindow[int64, *bucket]
proba *mathx.Proba
}
windowResult struct {
accepts int64
total int64
failingBuckets int64
workingBuckets int64
}
)
func newGoogleBreaker() *googleBreaker {
bucketDuration := time.Duration(int64(window) / int64(buckets))
st := collection.NewRollingWindow(buckets, bucketDuration)
st := collection.NewRollingWindow[int64, *bucket](func() *bucket {
return new(bucket)
}, buckets, bucketDuration)
return &googleBreaker{
stat: st,
k: k,
@@ -34,15 +49,28 @@ func newGoogleBreaker() *googleBreaker {
}
func (b *googleBreaker) accept() error {
accepts, total := b.history()
weightedAccepts := b.k * float64(accepts)
var w float64
history := b.history()
if history.failingBuckets >= minBucketsToSpeedUp {
w = b.k - float64(history.failingBuckets-1)*(b.k-minK)/maxFailBucketsToDecreaseK
w = mathx.AtLeast(w, minK)
} else {
w = b.k
}
weightedAccepts := w * float64(history.accepts)
// https://landing.google.com/sre/sre-book/chapters/handling-overload/#eq2101
// for better performance, no need to care about negative ratio
dropRatio := (float64(total-protection) - weightedAccepts) / float64(total+1)
// for better performance, no need to care about the negative ratio
dropRatio := (float64(history.total-protection) - weightedAccepts) / float64(history.total+1)
if dropRatio <= 0 {
return nil
}
// If we have more than 2 working buckets, we are in recovery mode,
// the latest bucket is the current one, so we ignore it.
if history.workingBuckets >= minBucketsToSpeedUp {
dropRatio /= recoveryK
}
if b.proba.TrueOnProba(dropRatio) {
return ErrServiceUnavailable
}
@@ -52,7 +80,7 @@ func (b *googleBreaker) accept() error {
func (b *googleBreaker) allow() (internalPromise, error) {
if err := b.accept(); err != nil {
b.markFailure()
b.markDrop()
return nil, err
}
@@ -63,7 +91,7 @@ func (b *googleBreaker) allow() (internalPromise, error) {
func (b *googleBreaker) doReq(req func() error, fallback Fallback, acceptable Acceptable) error {
if err := b.accept(); err != nil {
b.markFailure()
b.markDrop()
if fallback != nil {
return fallback(err)
}
@@ -71,10 +99,10 @@ func (b *googleBreaker) doReq(req func() error, fallback Fallback, acceptable Ac
return err
}
var success bool
var succ bool
defer func() {
// if req() panic, success is false, mark as failure
if success {
if succ {
b.markSuccess()
} else {
b.markFailure()
@@ -83,27 +111,43 @@ func (b *googleBreaker) doReq(req func() error, fallback Fallback, acceptable Ac
err := req()
if acceptable(err) {
success = true
succ = true
}
return err
}
func (b *googleBreaker) markSuccess() {
b.stat.Add(1)
func (b *googleBreaker) markDrop() {
b.stat.Add(drop)
}
func (b *googleBreaker) markFailure() {
b.stat.Add(0)
b.stat.Add(fail)
}
func (b *googleBreaker) history() (accepts, total int64) {
b.stat.Reduce(func(b *collection.Bucket) {
accepts += int64(b.Sum)
total += b.Count
func (b *googleBreaker) markSuccess() {
b.stat.Add(success)
}
func (b *googleBreaker) history() windowResult {
var result windowResult
b.stat.Reduce(func(b *bucket) {
result.accepts += b.Success
result.total += b.Sum
if b.Failure > 0 {
result.workingBuckets = 0
} else if b.Success > 0 {
result.workingBuckets++
}
if b.Drop > 0 && b.Failure > 0 {
result.failingBuckets++
} else {
result.failingBuckets = 0
}
})
return
return result
}
type googlePromise struct {