Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
45 changes: 42 additions & 3 deletions lightning/src/chain/chainmonitor.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1250,7 +1250,41 @@ where
}
}

if update_res.is_err() {
debug_assert!(
update_res.is_ok() || monitor.no_further_updates_allowed(),
"update_monitor returned Err but channel is not post-close",
);

// We also check update_res.is_err() as a defensive measure: an
// error should only occur on a post-close monitor (validated by
// the debug_assert above), but we defer here regardless to avoid
// returning Completed for a failed update.
if (update_res.is_err() || monitor.no_further_updates_allowed())
&& persist_res == ChannelMonitorUpdateStatus::Completed
{
// The channel is post-close (funding spend seen, lockdown, or
// holder tx signed). Return InProgress so ChannelManager freezes
// the channel until the force-close MonitorEvents are processed.
// Push a Completed event into pending_monitor_events so it gets
// picked up after the per-monitor events in the next
// release_pending_monitor_events call.
let funding_txo = monitor.get_funding_txo();
let channel_id = monitor.channel_id();
self.pending_monitor_events.lock().unwrap().push((
funding_txo,
channel_id,
vec![MonitorEvent::Completed {
funding_txo,
channel_id,
monitor_update_id: monitor.get_latest_update_id(),
}],
monitor.get_counterparty_node_id(),
));
log_debug!(
logger,
"Deferring completion of ChannelMonitorUpdate id {:?} (channel is post-close)",
update_id,
);
ChannelMonitorUpdateStatus::InProgress
} else {
persist_res
Expand Down Expand Up @@ -1614,8 +1648,9 @@ where
for (channel_id, update_id) in self.persister.get_and_clear_completed_updates() {
let _ = self.channel_monitor_updated(channel_id, update_id);
}
let mut pending_monitor_events = self.pending_monitor_events.lock().unwrap().split_off(0);
for monitor_state in self.monitors.read().unwrap().values() {
let monitors = self.monitors.read().unwrap();
let mut pending_monitor_events = Vec::new();
for monitor_state in monitors.values() {
let monitor_events = monitor_state.monitor.get_and_clear_pending_monitor_events();
if monitor_events.len() > 0 {
let monitor_funding_txo = monitor_state.monitor.get_funding_txo();
Expand All @@ -1629,6 +1664,10 @@ where
));
}
}
// Drain pending_monitor_events (which includes deferred post-close
// completions) after per-monitor events so that force-close
// MonitorEvents are processed by ChannelManager first.
pending_monitor_events.extend(self.pending_monitor_events.lock().unwrap().split_off(0));
Comment on lines +1667 to +1670
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The reordering here affects all async persist completions, not just the new deferred post-close ones. Previously, pending_monitor_events (which includes Completed events from channel_monitor_updated / async persist callbacks) was drained before per-monitor events. Now it's drained after.

This means if an async persist completion and a force-close MonitorEvent (e.g., CommitmentTxConfirmed) are both pending in the same release_pending_monitor_events call, the force-close is now processed first by ChannelManager. Previously, the persist completion would have been processed first, potentially unfreezing the channel and allowing queued messages to flow before the force-close.

This is likely the intended behavior for the deferred-completion case, but it's a broader behavioral change that also affects the normal async-persist path. Worth documenting in the commit message or a code comment that this ordering change is intentional and applies to all channel_monitor_updated completions.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It is intended behavior indeed.

pending_monitor_events
}
}
Expand Down
1 change: 1 addition & 0 deletions lightning/src/chain/channelmonitor.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7048,6 +7048,7 @@ mod tests {
let legacy_cfg = test_legacy_channel_config();
let node_chanmgrs = create_node_chanmgrs(3, &node_cfgs, &[Some(legacy_cfg.clone()), Some(legacy_cfg.clone()), Some(legacy_cfg)]);
let nodes = create_network(3, &node_cfgs, &node_chanmgrs);
nodes[1].disable_monitor_completeness_assertion();
let channel = create_announced_chan_between_nodes(&nodes, 0, 1);
create_announced_chan_between_nodes(&nodes, 1, 2);

Expand Down
12 changes: 4 additions & 8 deletions lightning/src/chain/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -233,11 +233,10 @@ pub enum ChannelMonitorUpdateStatus {
/// This includes performing any `fsync()` calls required to ensure the update is guaranteed to
/// be available on restart even if the application crashes.
///
/// If you return this variant, you cannot later return [`InProgress`] from the same instance of
/// [`Persist`]/[`Watch`] without first restarting.
/// You cannot switch from [`InProgress`] to this variant for the same channel without first
/// restarting. However, switching from this variant to [`InProgress`] is always allowed.
///
/// [`InProgress`]: ChannelMonitorUpdateStatus::InProgress
/// [`Persist`]: chainmonitor::Persist
Completed,
/// Indicates that the update will happen asynchronously in the background or that a transient
/// failure occurred which is being retried in the background and will eventually complete.
Expand All @@ -263,12 +262,7 @@ pub enum ChannelMonitorUpdateStatus {
/// reliable, this feature is considered beta, and a handful of edge-cases remain. Until the
/// remaining cases are fixed, in rare cases, *using this feature may lead to funds loss*.
///
/// If you return this variant, you cannot later return [`Completed`] from the same instance of
/// [`Persist`]/[`Watch`] without first restarting.
///
/// [`InProgress`]: ChannelMonitorUpdateStatus::InProgress
/// [`Completed`]: ChannelMonitorUpdateStatus::Completed
/// [`Persist`]: chainmonitor::Persist
InProgress,
/// Indicates that an update has failed and will not complete at any point in the future.
///
Expand Down Expand Up @@ -328,6 +322,8 @@ pub trait Watch<ChannelSigner: EcdsaChannelSigner> {
/// cannot be retried, the node should shut down immediately after returning
/// [`ChannelMonitorUpdateStatus::UnrecoverableError`], see its documentation for more info.
///
/// See [`ChannelMonitorUpdateStatus`] for requirements on when each variant may be returned.
///
/// [`ChannelManager`]: crate::ln::channelmanager::ChannelManager
fn update_channel(
&self, channel_id: ChannelId, update: &ChannelMonitorUpdate,
Expand Down
183 changes: 159 additions & 24 deletions lightning/src/ln/chanmon_update_fail_tests.rs
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ use crate::chain::chaininterface::LowerBoundedFeeEstimator;
use crate::chain::chainmonitor::ChainMonitor;
use crate::chain::channelmonitor::{ChannelMonitor, MonitorEvent, ANTI_REORG_DELAY};
use crate::chain::transaction::OutPoint;
use crate::chain::{ChannelMonitorUpdateStatus, Listen, Watch};
use crate::chain::{ChannelMonitorUpdateStatus, Confirm, Listen, Watch};
use crate::events::{ClosureReason, Event, HTLCHandlingFailureType, PaymentPurpose};
use crate::ln::channel::AnnouncementSigsState;
use crate::ln::channelmanager::{PaymentId, RAACommitmentOrder};
Expand Down Expand Up @@ -176,6 +176,7 @@ fn do_test_simple_monitor_temporary_update_fail(disconnect: bool) {
let node_cfgs = create_node_cfgs(2, &chanmon_cfgs);
let node_chanmgrs = create_node_chanmgrs(2, &node_cfgs, &[None, None]);
let mut nodes = create_network(2, &node_cfgs, &node_chanmgrs);
nodes[0].disable_monitor_completeness_assertion();

let node_a_id = nodes[0].node.get_our_node_id();
let node_b_id = nodes[1].node.get_our_node_id();
Expand Down Expand Up @@ -317,6 +318,7 @@ fn do_test_monitor_temporary_update_fail(disconnect_count: usize) {
let node_cfgs = create_node_cfgs(2, &chanmon_cfgs);
let node_chanmgrs = create_node_chanmgrs(2, &node_cfgs, &[None, None]);
let mut nodes = create_network(2, &node_cfgs, &node_chanmgrs);
nodes[0].disable_monitor_completeness_assertion();

let node_a_id = nodes[0].node.get_our_node_id();
let node_b_id = nodes[1].node.get_our_node_id();
Expand Down Expand Up @@ -970,6 +972,7 @@ fn do_test_monitor_update_fail_raa(test_ignore_second_cs: bool) {
let node_cfgs = create_node_cfgs(3, &chanmon_cfgs);
let node_chanmgrs = create_node_chanmgrs(3, &node_cfgs, &[None, None, None]);
let mut nodes = create_network(3, &node_cfgs, &node_chanmgrs);
nodes[1].disable_monitor_completeness_assertion();

let node_a_id = nodes[0].node.get_our_node_id();
let node_b_id = nodes[1].node.get_our_node_id();
Expand Down Expand Up @@ -1501,6 +1504,7 @@ fn claim_while_disconnected_monitor_update_fail() {
let node_cfgs = create_node_cfgs(2, &chanmon_cfgs);
let node_chanmgrs = create_node_chanmgrs(2, &node_cfgs, &[None, None]);
let mut nodes = create_network(2, &node_cfgs, &node_chanmgrs);
nodes[1].disable_monitor_completeness_assertion();

let node_a_id = nodes[0].node.get_our_node_id();
let node_b_id = nodes[1].node.get_our_node_id();
Expand Down Expand Up @@ -1728,6 +1732,7 @@ fn first_message_on_recv_ordering() {
let node_cfgs = create_node_cfgs(2, &chanmon_cfgs);
let node_chanmgrs = create_node_chanmgrs(2, &node_cfgs, &[None, None]);
let mut nodes = create_network(2, &node_cfgs, &node_chanmgrs);
nodes[1].disable_monitor_completeness_assertion();

let node_a_id = nodes[0].node.get_our_node_id();
let node_b_id = nodes[1].node.get_our_node_id();
Expand Down Expand Up @@ -3850,6 +3855,7 @@ fn do_test_durable_preimages_on_closed_channel(
// Now reload node B
let manager_b = nodes[1].node.encode();
reload_node!(nodes[1], &manager_b, &[&mon_ab, &mon_bc], persister, chain_mon, node_b_reload);
nodes[1].disable_monitor_completeness_assertion();

nodes[0].node.peer_disconnected(node_b_id);
nodes[2].node.peer_disconnected(node_b_id);
Expand Down Expand Up @@ -3899,11 +3905,28 @@ fn do_test_durable_preimages_on_closed_channel(
}
if !close_chans_before_reload {
check_closed_broadcast(&nodes[1], 1, false);
let reason = ClosureReason::CommitmentTxConfirmed;
check_closed_event(&nodes[1], 1, reason, &[node_a_id], 100000);
// When hold=false, get_and_clear_pending_events also triggers
// process_background_events (replaying the preimage and force-close updates)
// and resolves the deferred completions, firing PaymentForwarded alongside
// ChannelClosed. When hold=true, only ChannelClosed fires.
let evs = nodes[1].node.get_and_clear_pending_events();
let expected = if hold_post_reload_mon_update { 1 } else { 2 };
assert_eq!(evs.len(), expected, "{:?}", evs);
assert!(evs.iter().any(|e| matches!(
e,
Event::ChannelClosed { reason: ClosureReason::CommitmentTxConfirmed, .. }
)));
if !hold_post_reload_mon_update {
assert!(evs.iter().any(|e| matches!(e, Event::PaymentForwarded { .. })));
check_added_monitors(&nodes[1], mons_added);
}
}
nodes[1].node.timer_tick_occurred();
check_added_monitors(&nodes[1], mons_added);
// For !close_chans_before_reload && !hold, background events were already replayed
// during get_and_clear_pending_events above, so timer_tick adds no monitors.
let expected_mons =
if !close_chans_before_reload && !hold_post_reload_mon_update { 0 } else { mons_added };
check_added_monitors(&nodes[1], expected_mons);

// Finally, check that B created a payment preimage transaction and close out the payment.
let bs_txn = nodes[1].tx_broadcaster.txn_broadcasted.lock().unwrap().split_off(0);
Expand All @@ -3918,44 +3941,61 @@ fn do_test_durable_preimages_on_closed_channel(
check_closed_broadcast(&nodes[0], 1, false);
expect_payment_sent(&nodes[0], payment_preimage, None, true, true);

if close_chans_before_reload && !hold_post_reload_mon_update {
// For close_chans_before_reload with hold=false, the deferred completions
// haven't been processed yet. Trigger process_pending_monitor_events now.
let _ = nodes[1].node.get_and_clear_pending_msg_events();
check_added_monitors(&nodes[1], 0);
}

if !close_chans_before_reload || close_only_a {
// Make sure the B<->C channel is still alive and well by sending a payment over it.
let mut reconnect_args = ReconnectArgs::new(&nodes[1], &nodes[2]);
reconnect_args.pending_responding_commitment_signed.1 = true;
// The B<->C `ChannelMonitorUpdate` shouldn't be allowed to complete, which is the
// equivalent to the responding `commitment_signed` being a duplicate for node B, thus we
// need to set the `pending_responding_commitment_signed_dup` flag.
reconnect_args.pending_responding_commitment_signed_dup_monitor.1 = true;
if hold_post_reload_mon_update {
// When the A-B update is still InProgress, B-C monitor updates are blocked,
// so the responding commitment_signed is a duplicate that generates no update.
reconnect_args.pending_responding_commitment_signed_dup_monitor.1 = true;
}
reconnect_args.pending_raa.1 = true;

reconnect_nodes(reconnect_args);
}

// Once the blocked `ChannelMonitorUpdate` *finally* completes, the pending
// `PaymentForwarded` event will finally be released.
let (_, ab_update_id) = nodes[1].chain_monitor.get_latest_mon_update_id(chan_id_ab);
nodes[1].chain_monitor.chain_monitor.force_channel_monitor_updated(chan_id_ab, ab_update_id);
if hold_post_reload_mon_update {
// When the persister returned InProgress, we need to manually complete the
// A-B monitor update to unblock the PaymentForwarded completion action.
let (_, ab_update_id) = nodes[1].chain_monitor.get_latest_mon_update_id(chan_id_ab);
nodes[1]
.chain_monitor
.chain_monitor
.force_channel_monitor_updated(chan_id_ab, ab_update_id);
}

// If the A<->B channel was closed before we reload, we'll replay the claim against it on
// reload, causing the `PaymentForwarded` event to get replayed.
let evs = nodes[1].node.get_and_clear_pending_events();
assert_eq!(evs.len(), if close_chans_before_reload { 2 } else { 1 });
for ev in evs {
if let Event::PaymentForwarded { claim_from_onchain_tx, next_htlcs, .. } = ev {
if !claim_from_onchain_tx {
// If the outbound channel is still open, the `next_user_channel_id` should be available.
// This was previously broken.
assert!(next_htlcs[0].user_channel_id.is_some())
if !close_chans_before_reload && !hold_post_reload_mon_update {
// PaymentForwarded already fired during get_and_clear_pending_events above.
assert!(evs.is_empty(), "{:?}", evs);
} else {
assert_eq!(evs.len(), if close_chans_before_reload { 2 } else { 1 }, "{:?}", evs);
for ev in evs {
if let Event::PaymentForwarded { claim_from_onchain_tx, next_htlcs, .. } = ev {
if !claim_from_onchain_tx {
assert!(next_htlcs[0].user_channel_id.is_some())
}
} else {
panic!("Unexpected event: {:?}", ev);
}
} else {
panic!();
}
}

if !close_chans_before_reload || close_only_a {
// Once we call `process_pending_events` the final `ChannelMonitor` for the B<->C channel
// will fly, removing the payment preimage from it.
check_added_monitors(&nodes[1], 1);
if hold_post_reload_mon_update {
// The B-C monitor update from the completion action fires now.
check_added_monitors(&nodes[1], 1);
}
assert!(nodes[1].node.get_and_clear_pending_events().is_empty());
send_payment(&nodes[1], &[&nodes[2]], 100_000);
}
Expand Down Expand Up @@ -5415,3 +5455,98 @@ fn test_late_counterparty_commitment_update_after_holder_commitment_spend() {
fn test_late_counterparty_commitment_update_after_holder_commitment_spend_dust() {
do_test_late_counterparty_commitment_update_after_holder_commitment_spend(true);
}

#[test]
fn test_monitor_update_after_funding_spend() {
// Test that monitor updates still work after a funding spend is detected by the
// ChainMonitor but before ChannelManager has processed the corresponding block.
//
// When the counterparty commitment transaction confirms (funding spend), the
// ChannelMonitor sets funding_spend_seen and no_further_updates_allowed() returns
// true. ChainMonitor overrides all subsequent update_channel results to InProgress
// to freeze the channel. These overridden updates complete via deferred completions
// in release_pending_monitor_events, so that MonitorUpdateCompletionActions (like
// PaymentClaimed) can still fire.
let chanmon_cfgs = create_chanmon_cfgs(2);
let node_cfgs = create_node_cfgs(2, &chanmon_cfgs);
let node_chanmgrs = create_node_chanmgrs(2, &node_cfgs, &[None, None]);
let nodes = create_network(2, &node_cfgs, &node_chanmgrs);

let node_a_id = nodes[0].node.get_our_node_id();

let (_, _, chan_id, _) = create_announced_chan_between_nodes(&nodes, 0, 1);

// Route payment 1 fully so B can claim it later.
let (payment_preimage_1, payment_hash_1, ..) =
route_payment(&nodes[0], &[&nodes[1]], 1_000_000);

// Get A's commitment tx (this is the "counterparty" commitment from B's perspective).
let as_commitment_tx = get_local_commitment_txn!(nodes[0], chan_id);
assert_eq!(as_commitment_tx.len(), 1);

// Confirm A's commitment tx on B's chain_monitor ONLY (not on B's ChannelManager).
// This sets funding_spend_seen in the monitor, making no_further_updates_allowed() true.
// We also update the best block on the chain_monitor so the broadcaster height is
// consistent when claiming HTLCs.
let (block_hash, height) = nodes[1].best_block_info();
let block = create_dummy_block(block_hash, height + 1, vec![as_commitment_tx[0].clone()]);
let txdata: Vec<_> = block.txdata.iter().enumerate().collect();
nodes[1].chain_monitor.chain_monitor.transactions_confirmed(&block.header, &txdata, height + 1);
nodes[1].chain_monitor.chain_monitor.best_block_updated(&block.header, height + 1);
nodes[1].blocks.lock().unwrap().push((block, height + 1));

// Send payment 2 from A to B.
let (route, payment_hash_2, _, payment_secret_2) =
get_route_and_payment_hash!(&nodes[0], nodes[1], 1_000_000);
nodes[0]
.node
.send_payment_with_route(
route,
payment_hash_2,
RecipientOnionFields::secret_only(payment_secret_2, 1_000_000),
PaymentId(payment_hash_2.0),
)
.unwrap();
check_added_monitors(&nodes[0], 1);

let mut events = nodes[0].node.get_and_clear_pending_msg_events();
assert_eq!(events.len(), 1);
let payment_event = SendEvent::from_event(events.remove(0));

nodes[1].node.handle_update_add_htlc(node_a_id, &payment_event.msgs[0]);

// B processes commitment_signed. The monitor applies the update but returns Err
// because no_further_updates_allowed() is true. ChainMonitor overrides to InProgress,
// freezing the channel.
nodes[1].node.handle_commitment_signed(node_a_id, &payment_event.commitment_msg[0]);
check_added_monitors(&nodes[1], 1);

// B claims payment 1. The preimage monitor update also returns InProgress (deferred),
// so no Completed-while-InProgress assertion fires.
nodes[1].node.claim_funds(payment_preimage_1);
check_added_monitors(&nodes[1], 1);

// First event cycle: the force-close MonitorEvent (CommitmentTxConfirmed) fires first,
// then the deferred completions resolve. The force-close generates a ChannelForceClosed
// update (also deferred), which blocks completion actions. So we only get ChannelClosed.
let events = nodes[1].node.get_and_clear_pending_events();
assert_eq!(events.len(), 1);
match &events[0] {
Event::ChannelClosed { reason: ClosureReason::CommitmentTxConfirmed, .. } => {},
_ => panic!("Unexpected event: {:?}", events[0]),
}
check_added_monitors(&nodes[1], 1);
nodes[1].node.get_and_clear_pending_msg_events();

// Second event cycle: the ChannelForceClosed deferred completion resolves, unblocking
// the PaymentClaimed completion action.
let events = nodes[1].node.get_and_clear_pending_events();
assert_eq!(events.len(), 1);
match &events[0] {
Event::PaymentClaimed { payment_hash, amount_msat, .. } => {
assert_eq!(payment_hash_1, *payment_hash);
assert_eq!(1_000_000, *amount_msat);
},
_ => panic!("Unexpected event: {:?}", events[0]),
}
}
Loading
Loading