Skip to content

Commit c4585b2

Browse files
committed
Defer monitor update completions after funding spend
When no_further_updates_allowed() is true and the persister returns Completed, ChainMonitor now overrides the return to InProgress and queues the update_id in deferred_monitor_update_completions. These are resolved in release_pending_monitor_events, so ChannelManager sees them complete together with the force-close MonitorEvents. This eliminates phantom InProgress entries that would never complete: previously, a rejected pre-close update (e.g. commitment_signed arriving after funding spend) returned InProgress with no completion path, blocking MonitorUpdateCompletionActions (PaymentClaimed, PaymentForwarded) indefinitely. A subsequent post-close update returning Completed would then violate the in-order completion invariant. AI tools were used in preparing this commit.
1 parent 7132bd4 commit c4585b2

File tree

3 files changed

+197
-46
lines changed

3 files changed

+197
-46
lines changed

lightning/src/chain/chainmonitor.rs

Lines changed: 91 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -238,6 +238,11 @@ struct MonitorHolder<ChannelSigner: EcdsaChannelSigner> {
238238
/// [`ChannelMonitorUpdate`] which was already applied. While this isn't an issue for the
239239
/// LDK-provided update-based [`Persist`], it is somewhat surprising for users so we avoid it.
240240
pending_monitor_updates: Mutex<Vec<u64>>,
241+
/// Monitor update IDs for which the persister returned `Completed` but we overrode the return
242+
/// to `InProgress` because the channel is post-close (`no_further_updates_allowed()` is true).
243+
/// Resolved during the next monitor event release so that `ChannelManager` sees them complete
244+
/// together with the force-close `MonitorEvent`s.
245+
deferred_monitor_update_completions: Mutex<Vec<u64>>,
241246
}
242247

243248
impl<ChannelSigner: EcdsaChannelSigner> MonitorHolder<ChannelSigner> {
@@ -1100,7 +1105,11 @@ where
11001105
if let Some(ref chain_source) = self.chain_source {
11011106
monitor.load_outputs_to_watch(chain_source, &self.logger);
11021107
}
1103-
entry.insert(MonitorHolder { monitor, pending_monitor_updates: Mutex::new(Vec::new()) });
1108+
entry.insert(MonitorHolder {
1109+
monitor,
1110+
pending_monitor_updates: Mutex::new(Vec::new()),
1111+
deferred_monitor_update_completions: Mutex::new(Vec::new()),
1112+
});
11041113

11051114
Ok(ChannelMonitorUpdateStatus::Completed)
11061115
}
@@ -1141,6 +1150,7 @@ where
11411150
entry.insert(MonitorHolder {
11421151
monitor,
11431152
pending_monitor_updates: Mutex::new(pending_monitor_updates),
1153+
deferred_monitor_update_completions: Mutex::new(Vec::new()),
11441154
});
11451155
Ok(persist_res)
11461156
}
@@ -1171,6 +1181,12 @@ where
11711181
let logger = WithChannelMonitor::from(&self.logger, &monitor, None);
11721182
log_trace!(logger, "Updating ChannelMonitor to id {}", update.update_id,);
11731183

1184+
// We hold `deferred_monitor_update_completions` through the entire
1185+
// update process so that `release_pending_monitor_events` either
1186+
// sees both the `MonitorEvent`s generated by `update_monitor` and
1187+
// the corresponding deferral entry, or neither.
1188+
let mut deferred =
1189+
monitor_state.deferred_monitor_update_completions.lock().unwrap();
11741190
// We hold a `pending_monitor_updates` lock through `update_monitor` to ensure we
11751191
// have well-ordered updates from the users' point of view. See the
11761192
// `pending_monitor_updates` docs for more.
@@ -1222,6 +1238,7 @@ where
12221238
ChannelMonitorUpdateStatus::UnrecoverableError => {
12231239
// Take the monitors lock for writing so that we poison it and any future
12241240
// operations going forward fail immediately.
1241+
core::mem::drop(deferred);
12251242
core::mem::drop(pending_monitor_updates);
12261243
core::mem::drop(monitors);
12271244
let _poison = self.monitors.write().unwrap();
@@ -1250,7 +1267,31 @@ where
12501267
}
12511268
}
12521269

1253-
if update_res.is_err() {
1270+
debug_assert!(
1271+
update_res.is_ok() || monitor.no_further_updates_allowed(),
1272+
"update_monitor returned Err but channel is not post-close",
1273+
);
1274+
1275+
// We also check update_res.is_err() as a defensive measure: an
1276+
// error should only occur on a post-close monitor (validated by
1277+
// the debug_assert above), but we defer here regardless to avoid
1278+
// returning Completed for a failed update.
1279+
if (update_res.is_err() || monitor.no_further_updates_allowed())
1280+
&& persist_res == ChannelMonitorUpdateStatus::Completed
1281+
{
1282+
// The channel is post-close (funding spend seen, lockdown, or holder
1283+
// tx signed). Return InProgress so ChannelManager freezes the
1284+
// channel until the force-close MonitorEvents are processed. We
1285+
// track this update_id in deferred_monitor_update_completions so it
1286+
// gets resolved during release_pending_monitor_events, together with
1287+
// those MonitorEvents.
1288+
pending_monitor_updates.push(update_id);
1289+
deferred.push(update_id);
1290+
log_debug!(
1291+
logger,
1292+
"Deferring completion of ChannelMonitorUpdate id {:?} (channel is post-close)",
1293+
update_id,
1294+
);
12541295
ChannelMonitorUpdateStatus::InProgress
12551296
} else {
12561297
persist_res
@@ -1608,8 +1649,13 @@ where
16081649
for (channel_id, update_id) in self.persister.get_and_clear_completed_updates() {
16091650
let _ = self.channel_monitor_updated(channel_id, update_id);
16101651
}
1652+
let monitors = self.monitors.read().unwrap();
16111653
let mut pending_monitor_events = self.pending_monitor_events.lock().unwrap().split_off(0);
1612-
for monitor_state in self.monitors.read().unwrap().values() {
1654+
for monitor_state in monitors.values() {
1655+
// Hold the deferred lock across both `get_and_clear_pending_monitor_events`
1656+
// and the deferred resolution so that `update_monitor` cannot insert a
1657+
// `MonitorEvent` and deferral entry between the two steps.
1658+
let mut deferred = monitor_state.deferred_monitor_update_completions.lock().unwrap();
16131659
let monitor_events = monitor_state.monitor.get_and_clear_pending_monitor_events();
16141660
if monitor_events.len() > 0 {
16151661
let monitor_funding_txo = monitor_state.monitor.get_funding_txo();
@@ -1622,6 +1668,48 @@ where
16221668
counterparty_node_id,
16231669
));
16241670
}
1671+
// Resolve any deferred monitor update completions after collecting regular monitor
1672+
// events. The regular events include the force-close (CommitmentTxConfirmed), which
1673+
// ChannelManager processes first. The deferred completions come after, so that
1674+
// completion actions resolve once the ChannelForceClosed update (generated during
1675+
// force-close processing) also gets deferred and resolved in the next event cycle.
1676+
let mut pending = monitor_state.pending_monitor_updates.lock().unwrap();
1677+
for update_id in deferred.drain(..) {
1678+
let Some(pos) = pending.iter().position(|id| *id == update_id) else {
1679+
// Already resolved by channel_monitor_updated; skip to avoid
1680+
// duplicate MonitorEvent::Completed.
1681+
let channel_id = monitor_state.monitor.channel_id();
1682+
debug_assert!(
1683+
false,
1684+
"Deferred update {} already resolved for channel {}",
1685+
update_id, channel_id
1686+
);
1687+
let logger = WithContext::from(
1688+
&self.logger,
1689+
Some(monitor_state.monitor.get_counterparty_node_id()),
1690+
Some(channel_id),
1691+
None,
1692+
);
1693+
log_error!(logger, "Deferred update {} already resolved", update_id);
1694+
continue;
1695+
};
1696+
pending.remove(pos);
1697+
let monitor_is_pending_updates = monitor_state.has_pending_updates(&pending);
1698+
if !monitor_is_pending_updates {
1699+
let funding_txo = monitor_state.monitor.get_funding_txo();
1700+
let channel_id = monitor_state.monitor.channel_id();
1701+
pending_monitor_events.push((
1702+
funding_txo,
1703+
channel_id,
1704+
vec![MonitorEvent::Completed {
1705+
funding_txo,
1706+
channel_id,
1707+
monitor_update_id: monitor_state.monitor.get_latest_update_id(),
1708+
}],
1709+
monitor_state.monitor.get_counterparty_node_id(),
1710+
));
1711+
}
1712+
}
16251713
}
16261714
pending_monitor_events
16271715
}

lightning/src/ln/chanmon_update_fail_tests.rs

Lines changed: 102 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -3904,11 +3904,28 @@ fn do_test_durable_preimages_on_closed_channel(
39043904
}
39053905
if !close_chans_before_reload {
39063906
check_closed_broadcast(&nodes[1], 1, false);
3907-
let reason = ClosureReason::CommitmentTxConfirmed;
3908-
check_closed_event(&nodes[1], 1, reason, &[node_a_id], 100000);
3907+
// When hold=false, get_and_clear_pending_events also triggers
3908+
// process_background_events (replaying the preimage and force-close updates)
3909+
// and resolves the deferred completions, firing PaymentForwarded alongside
3910+
// ChannelClosed. When hold=true, only ChannelClosed fires.
3911+
let evs = nodes[1].node.get_and_clear_pending_events();
3912+
let expected = if hold_post_reload_mon_update { 1 } else { 2 };
3913+
assert_eq!(evs.len(), expected, "{:?}", evs);
3914+
assert!(evs.iter().any(|e| matches!(
3915+
e,
3916+
Event::ChannelClosed { reason: ClosureReason::CommitmentTxConfirmed, .. }
3917+
)));
3918+
if !hold_post_reload_mon_update {
3919+
assert!(evs.iter().any(|e| matches!(e, Event::PaymentForwarded { .. })));
3920+
check_added_monitors(&nodes[1], mons_added);
3921+
}
39093922
}
39103923
nodes[1].node.timer_tick_occurred();
3911-
check_added_monitors(&nodes[1], mons_added);
3924+
// For !close_chans_before_reload && !hold, background events were already replayed
3925+
// during get_and_clear_pending_events above, so timer_tick adds no monitors.
3926+
let expected_mons =
3927+
if !close_chans_before_reload && !hold_post_reload_mon_update { 0 } else { mons_added };
3928+
check_added_monitors(&nodes[1], expected_mons);
39123929

39133930
// Finally, check that B created a payment preimage transaction and close out the payment.
39143931
let bs_txn = nodes[1].tx_broadcaster.txn_broadcasted.lock().unwrap().split_off(0);
@@ -3923,44 +3940,61 @@ fn do_test_durable_preimages_on_closed_channel(
39233940
check_closed_broadcast(&nodes[0], 1, false);
39243941
expect_payment_sent(&nodes[0], payment_preimage, None, true, true);
39253942

3943+
if close_chans_before_reload && !hold_post_reload_mon_update {
3944+
// For close_chans_before_reload with hold=false, the deferred completions
3945+
// haven't been processed yet. Trigger process_pending_monitor_events now.
3946+
let _ = nodes[1].node.get_and_clear_pending_msg_events();
3947+
check_added_monitors(&nodes[1], 0);
3948+
}
3949+
39263950
if !close_chans_before_reload || close_only_a {
39273951
// Make sure the B<->C channel is still alive and well by sending a payment over it.
39283952
let mut reconnect_args = ReconnectArgs::new(&nodes[1], &nodes[2]);
39293953
reconnect_args.pending_responding_commitment_signed.1 = true;
3930-
// The B<->C `ChannelMonitorUpdate` shouldn't be allowed to complete, which is the
3931-
// equivalent to the responding `commitment_signed` being a duplicate for node B, thus we
3932-
// need to set the `pending_responding_commitment_signed_dup` flag.
3933-
reconnect_args.pending_responding_commitment_signed_dup_monitor.1 = true;
3954+
if hold_post_reload_mon_update {
3955+
// When the A-B update is still InProgress, B-C monitor updates are blocked,
3956+
// so the responding commitment_signed is a duplicate that generates no update.
3957+
reconnect_args.pending_responding_commitment_signed_dup_monitor.1 = true;
3958+
}
39343959
reconnect_args.pending_raa.1 = true;
39353960

39363961
reconnect_nodes(reconnect_args);
39373962
}
39383963

3939-
// Once the blocked `ChannelMonitorUpdate` *finally* completes, the pending
3940-
// `PaymentForwarded` event will finally be released.
3941-
let (_, ab_update_id) = nodes[1].chain_monitor.get_latest_mon_update_id(chan_id_ab);
3942-
nodes[1].chain_monitor.chain_monitor.force_channel_monitor_updated(chan_id_ab, ab_update_id);
3964+
if hold_post_reload_mon_update {
3965+
// When the persister returned InProgress, we need to manually complete the
3966+
// A-B monitor update to unblock the PaymentForwarded completion action.
3967+
let (_, ab_update_id) = nodes[1].chain_monitor.get_latest_mon_update_id(chan_id_ab);
3968+
nodes[1]
3969+
.chain_monitor
3970+
.chain_monitor
3971+
.force_channel_monitor_updated(chan_id_ab, ab_update_id);
3972+
}
39433973

39443974
// If the A<->B channel was closed before we reload, we'll replay the claim against it on
39453975
// reload, causing the `PaymentForwarded` event to get replayed.
39463976
let evs = nodes[1].node.get_and_clear_pending_events();
3947-
assert_eq!(evs.len(), if close_chans_before_reload { 2 } else { 1 });
3948-
for ev in evs {
3949-
if let Event::PaymentForwarded { claim_from_onchain_tx, next_htlcs, .. } = ev {
3950-
if !claim_from_onchain_tx {
3951-
// If the outbound channel is still open, the `next_user_channel_id` should be available.
3952-
// This was previously broken.
3953-
assert!(next_htlcs[0].user_channel_id.is_some())
3977+
if !close_chans_before_reload && !hold_post_reload_mon_update {
3978+
// PaymentForwarded already fired during get_and_clear_pending_events above.
3979+
assert!(evs.is_empty(), "{:?}", evs);
3980+
} else {
3981+
assert_eq!(evs.len(), if close_chans_before_reload { 2 } else { 1 }, "{:?}", evs);
3982+
for ev in evs {
3983+
if let Event::PaymentForwarded { claim_from_onchain_tx, next_htlcs, .. } = ev {
3984+
if !claim_from_onchain_tx {
3985+
assert!(next_htlcs[0].user_channel_id.is_some())
3986+
}
3987+
} else {
3988+
panic!("Unexpected event: {:?}", ev);
39543989
}
3955-
} else {
3956-
panic!();
39573990
}
39583991
}
39593992

39603993
if !close_chans_before_reload || close_only_a {
3961-
// Once we call `process_pending_events` the final `ChannelMonitor` for the B<->C channel
3962-
// will fly, removing the payment preimage from it.
3963-
check_added_monitors(&nodes[1], 1);
3994+
if hold_post_reload_mon_update {
3995+
// The B-C monitor update from the completion action fires now.
3996+
check_added_monitors(&nodes[1], 1);
3997+
}
39643998
assert!(nodes[1].node.get_and_clear_pending_events().is_empty());
39653999
send_payment(&nodes[1], &[&nodes[2]], 100_000);
39664000
}
@@ -5179,17 +5213,16 @@ fn test_mpp_claim_to_holding_cell() {
51795213
}
51805214

51815215
#[test]
5182-
#[should_panic(
5183-
expected = "Watch::update_channel returned Completed while prior updates are still InProgress"
5184-
)]
5185-
fn test_monitor_update_fail_after_funding_spend() {
5186-
// When a counterparty commitment transaction confirms (funding spend), the
5187-
// ChannelMonitor sets funding_spend_seen. If a commitment_signed from the
5188-
// counterparty is then processed (a race between chain events and message
5189-
// processing), update_monitor returns Err because no_further_updates_allowed()
5190-
// is true. ChainMonitor overrides the result to InProgress, permanently
5191-
// freezing the channel. A subsequent preimage claim returning Completed then
5192-
// triggers the per-channel assertion.
5216+
fn test_monitor_update_after_funding_spend() {
5217+
// Test that monitor updates still work after a funding spend is detected by the
5218+
// ChainMonitor but before ChannelManager has processed the corresponding block.
5219+
//
5220+
// When the counterparty commitment transaction confirms (funding spend), the
5221+
// ChannelMonitor sets funding_spend_seen and no_further_updates_allowed() returns
5222+
// true. ChainMonitor overrides all subsequent update_channel results to InProgress
5223+
// to freeze the channel. These overridden updates complete via deferred completions
5224+
// in release_pending_monitor_events, so that MonitorUpdateCompletionActions (like
5225+
// PaymentClaimed) can still fire.
51935226
let chanmon_cfgs = create_chanmon_cfgs(2);
51945227
let node_cfgs = create_node_cfgs(2, &chanmon_cfgs);
51955228
let node_chanmgrs = create_node_chanmgrs(2, &node_cfgs, &[None, None]);
@@ -5200,7 +5233,7 @@ fn test_monitor_update_fail_after_funding_spend() {
52005233
let (_, _, chan_id, _) = create_announced_chan_between_nodes(&nodes, 0, 1);
52015234

52025235
// Route payment 1 fully so B can claim it later.
5203-
let (payment_preimage_1, _payment_hash_1, ..) =
5236+
let (payment_preimage_1, payment_hash_1, ..) =
52045237
route_payment(&nodes[0], &[&nodes[1]], 1_000_000);
52055238

52065239
// Get A's commitment tx (this is the "counterparty" commitment from B's perspective).
@@ -5209,10 +5242,14 @@ fn test_monitor_update_fail_after_funding_spend() {
52095242

52105243
// Confirm A's commitment tx on B's chain_monitor ONLY (not on B's ChannelManager).
52115244
// This sets funding_spend_seen in the monitor, making no_further_updates_allowed() true.
5245+
// We also update the best block on the chain_monitor so the broadcaster height is
5246+
// consistent when claiming HTLCs.
52125247
let (block_hash, height) = nodes[1].best_block_info();
52135248
let block = create_dummy_block(block_hash, height + 1, vec![as_commitment_tx[0].clone()]);
52145249
let txdata: Vec<_> = block.txdata.iter().enumerate().collect();
52155250
nodes[1].chain_monitor.chain_monitor.transactions_confirmed(&block.header, &txdata, height + 1);
5251+
nodes[1].chain_monitor.chain_monitor.best_block_updated(&block.header, height + 1);
5252+
nodes[1].blocks.lock().unwrap().push((block, height + 1));
52165253

52175254
// Send payment 2 from A to B.
52185255
let (route, payment_hash_2, _, payment_secret_2) =
@@ -5234,15 +5271,38 @@ fn test_monitor_update_fail_after_funding_spend() {
52345271

52355272
nodes[1].node.handle_update_add_htlc(node_a_id, &payment_event.msgs[0]);
52365273

5237-
// B processes commitment_signed. The monitor's update_monitor succeeds on the
5238-
// update steps, but returns Err at the end because no_further_updates_allowed()
5239-
// is true (funding_spend_seen). ChainMonitor overrides the result to InProgress.
5274+
// B processes commitment_signed. The monitor applies the update but returns Err
5275+
// because no_further_updates_allowed() is true. ChainMonitor overrides to InProgress,
5276+
// freezing the channel.
52405277
nodes[1].node.handle_commitment_signed(node_a_id, &payment_event.commitment_msg[0]);
52415278
check_added_monitors(&nodes[1], 1);
5242-
assert!(nodes[1].node.get_and_clear_pending_msg_events().is_empty());
52435279

5244-
// B claims payment 1. The PaymentPreimage monitor update returns Completed
5245-
// (update_monitor succeeds for preimage, and persister returns Completed),
5246-
// but the prior InProgress from the commitment_signed is still pending.
5280+
// B claims payment 1. The preimage monitor update also returns InProgress (deferred),
5281+
// so no Completed-while-InProgress assertion fires.
52475282
nodes[1].node.claim_funds(payment_preimage_1);
5283+
check_added_monitors(&nodes[1], 1);
5284+
5285+
// First event cycle: the force-close MonitorEvent (CommitmentTxConfirmed) fires first,
5286+
// then the deferred completions resolve. The force-close generates a ChannelForceClosed
5287+
// update (also deferred), which blocks completion actions. So we only get ChannelClosed.
5288+
let events = nodes[1].node.get_and_clear_pending_events();
5289+
assert_eq!(events.len(), 1);
5290+
match &events[0] {
5291+
Event::ChannelClosed { reason: ClosureReason::CommitmentTxConfirmed, .. } => {},
5292+
_ => panic!("Unexpected event: {:?}", events[0]),
5293+
}
5294+
check_added_monitors(&nodes[1], 1);
5295+
nodes[1].node.get_and_clear_pending_msg_events();
5296+
5297+
// Second event cycle: the ChannelForceClosed deferred completion resolves, unblocking
5298+
// the PaymentClaimed completion action.
5299+
let events = nodes[1].node.get_and_clear_pending_events();
5300+
assert_eq!(events.len(), 1);
5301+
match &events[0] {
5302+
Event::PaymentClaimed { payment_hash, amount_msat, .. } => {
5303+
assert_eq!(payment_hash_1, *payment_hash);
5304+
assert_eq!(1_000_000, *amount_msat);
5305+
},
5306+
_ => panic!("Unexpected event: {:?}", events[0]),
5307+
}
52485308
}

lightning/src/ln/channelmanager.rs

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10387,7 +10387,10 @@ This indicates a bug inside LDK. Please report this error at https://github.com/
1038710387
#[cfg(not(test))]
1038810388
let skip_check = false;
1038910389
if !skip_check && update_completed && !in_flight_updates.is_empty() {
10390-
panic!("Watch::update_channel returned Completed while prior updates are still InProgress");
10390+
panic!(
10391+
"Watch::update_channel returned Completed for channel {} while {} prior updates are still InProgress",
10392+
channel_id, in_flight_updates.len(),
10393+
);
1039110394
}
1039210395
(update_completed, update_completed && in_flight_updates.is_empty())
1039310396
} else {

0 commit comments

Comments
 (0)