Skip to content

Commit 969d25c

Browse files
Merge 050f786 into blathers/backport-release-26.1-159403
2 parents 4097cda + 050f786 commit 969d25c

File tree

3 files changed

+29
-1
lines changed

3 files changed

+29
-1
lines changed

pkg/util/admission/admission.go

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -155,6 +155,15 @@ func (bq burstQualification) String() string {
155155
}
156156
}
157157

158+
// Mutex ordering between requester and granter:
159+
//
160+
// The requester and granter call into each other. To prevent deadlock due to
161+
// mutex cycles, any mutex in granter is ordered before any mutex in
162+
// requester. Therefore, a requester must not hold its own mutex when calling
163+
// into granter. Of course, a granter could choose to release its own mutex
164+
// before calling into requester, but it is not necessary for deadlock
165+
// prevention.
166+
158167
// requester is an interface implemented by an object that orders admission
159168
// work for a particular WorkKind. See WorkQueue for the implementation of
160169
// requester.

pkg/util/admission/io_load_listener.go

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -583,6 +583,8 @@ func (io *ioLoadListener) pebbleMetricsTick(ctx context.Context, metrics StoreMe
583583
io.cumFlushWriteThroughput = metrics.Flush.WriteThroughput
584584
// We assume that the system is loaded if there is less than unlimited tokens
585585
// available.
586+
//
587+
// TODO(sumeer): this condition should also incorporate disk byte tokens.
586588
return io.totalNumByteTokens < unlimitedTokens || io.totalNumElasticByteTokens < unlimitedTokens
587589
}
588590

pkg/util/admission/snapshot_queue.go

Lines changed: 18 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -89,6 +89,9 @@ type snapshotRequester interface {
8989
type SnapshotQueue struct {
9090
snapshotGranter granter
9191
mu struct {
92+
// Reminder: this mutex must not be held when calling into
93+
// snapshotGranter, as documented near the declaration of requester and
94+
// granter.
9295
syncutil.Mutex
9396
q *queue.Queue[*snapshotWorkItem]
9497
}
@@ -164,6 +167,13 @@ func (s *SnapshotQueue) Admit(ctx context.Context, count int64) error {
164167
return nil
165168
}
166169
// We were unable to get tokens for admission, so we queue.
170+
//
171+
// Reminder: there is a race here where a call to hasWaitingRequests after
172+
// tryGet and before the mutex is acquired and the item is added to the
173+
// queue will not see the waiting request. Such a race would result in an
174+
// end state where the granter has resources and the requester has waiting
175+
// requests. This is harmless since hasWaitingRequests is called
176+
// periodically at a high enough frequency when tokens are limited.
167177
shouldRelease := true
168178
item := newSnapshotWorkItem(count)
169179
defer func() {
@@ -182,11 +192,15 @@ func (s *SnapshotQueue) Admit(ctx context.Context, count int64) error {
182192
select {
183193
case <-ctx.Done():
184194
waitDur := timeutil.Since(item.enqueueingTime).Nanoseconds()
195+
// INVARIANT: tokensToReturn >= 0, since item.count >= 0.
196+
var tokensToReturn int64
185197
func() {
186198
s.mu.Lock()
187199
defer s.mu.Unlock()
188200
if !item.mu.inQueue {
189-
s.snapshotGranter.returnGrant(item.count)
201+
// NB: we must call snapshotGranter.returnGrant after releasing the
202+
// mutex.
203+
tokensToReturn = item.count
190204
}
191205
// TODO(aaditya): Ideally, we also remove the item from the actual queue.
192206
// Right now, if we cancel the work, it remains in the queue. A call to
@@ -196,6 +210,9 @@ func (s *SnapshotQueue) Admit(ctx context.Context, count int64) error {
196210
// non-ideal behavior, but still provides accurate token accounting.
197211
item.mu.cancelled = true
198212
}()
213+
if tokensToReturn != 0 {
214+
s.snapshotGranter.returnGrant(tokensToReturn)
215+
}
199216
shouldRelease = false
200217
s.metrics.WaitDurations.RecordValue(waitDur)
201218
var deadlineSubstring string

0 commit comments

Comments
 (0)