diff --git a/CHANGELOG.md b/CHANGELOG.md index db5f6b8ef..a618f6f1f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,7 +1,32 @@ -# 0.7.0-rc.33 (Synonym Fork) +# 0.7.0-rc.36 (Synonym Fork) ## Bug Fixes +- Fixed orphaned channel migration blocking node startup when the existing monitor + in the KV store can't be deserialized (e.g., `UnknownVersion` from a newer LDK + version). The migration now skips writing and lets the node start normally, + preserving the existing monitor data. +- Fixed HTLC timeout force-close during stale monitor recovery. The healing keysend + created HTLCs with a stale `cltv_expiry` (based on the ChannelManager's outdated + best block height for users offline >24h). When chain sync caught up, LDK + force-closed the channel (HTLCsTimedOut). Fix: sync the chain tip before sending + healing payments so HTLCs get a valid CLTV expiry. If sync fails, skip the keysend + to avoid the stale-CLTV force-close. +- Fixed native crash (SIGABRT) during stale channel monitor recovery. The + `CounterpartyCommitmentSecrets` store was not reset when force-syncing the + monitor's `update_id`, causing `provide_secret()` to fail validation after + a few commitment round-trips. The failed update triggered a + `ChannelMonitorUpdateStatus` mode mismatch panic in the ChannelManager. + Fix: reset the secrets store in `force_set_latest_update_id` so new secrets + build a fresh, consistent tree. (rust-lightning fork change) +- Added `BuildError::DangerousValue` variant to distinguish stale channel monitor failures from + the 19 other `ReadFailed` causes. Apps can now catch this specific error to trigger one-shot + recovery without false positives from unrelated I/O or deserialization errors. +- Added `set_accept_stale_channel_monitors` builder API for recovery from channel monitor desync + (e.g., after migration overwrote newer monitors with stale backup data). When enabled, + force-syncs stale monitor update_ids during build, defers chain sync, and sends probes to + trigger commitment round-trips that heal the monitor state. Depends on a patched rust-lightning + fork (`synonymdev/rust-lightning#0.2.2-accept-stale-monitors`). - Fixed cumulative change-address derivation index leak during fee estimation and dry-run transaction builds. BDK's `TxBuilder::finish()` advances the internal (change) keychain index each time it's called; repeated fee estimations would burn through change addresses without diff --git a/Cargo.toml b/Cargo.toml index 077b4c6d7..fc45137fe 100755 --- a/Cargo.toml +++ b/Cargo.toml @@ -4,7 +4,7 @@ exclude = ["bindings/uniffi-bindgen"] [package] name = "ldk-node" -version = "0.7.0-rc.33" +version = "0.7.0-rc.36" authors = ["Elias Rohrer "] homepage = "https://lightningdevkit.org/" license = "MIT OR Apache-2.0" @@ -123,6 +123,19 @@ check-cfg = [ name = "payments" harness = false +[patch.crates-io] +lightning = { git = "https://github.com/synonymdev/rust-lightning", branch = "0.2.2-accept-stale-monitors" } +lightning-types = { git = "https://github.com/synonymdev/rust-lightning", branch = "0.2.2-accept-stale-monitors" } +lightning-invoice = { git = "https://github.com/synonymdev/rust-lightning", branch = "0.2.2-accept-stale-monitors" } +lightning-net-tokio = { git = "https://github.com/synonymdev/rust-lightning", branch = "0.2.2-accept-stale-monitors" } +lightning-persister = { git = "https://github.com/synonymdev/rust-lightning", branch = "0.2.2-accept-stale-monitors" } +lightning-background-processor = { git = "https://github.com/synonymdev/rust-lightning", branch = "0.2.2-accept-stale-monitors" } +lightning-rapid-gossip-sync = { git = "https://github.com/synonymdev/rust-lightning", branch = "0.2.2-accept-stale-monitors" } +lightning-block-sync = { git = "https://github.com/synonymdev/rust-lightning", branch = "0.2.2-accept-stale-monitors" } +lightning-transaction-sync = { git = "https://github.com/synonymdev/rust-lightning", branch = "0.2.2-accept-stale-monitors" } +lightning-liquidity = { git = "https://github.com/synonymdev/rust-lightning", branch = "0.2.2-accept-stale-monitors" } +lightning-macros = { git = "https://github.com/synonymdev/rust-lightning", branch = "0.2.2-accept-stale-monitors" } + #[patch.crates-io] #lightning = { path = "../rust-lightning/lightning" } #lightning-types = { path = "../rust-lightning/lightning-types" } diff --git a/Package.swift b/Package.swift index df80b2e2f..9d261f9dc 100644 --- a/Package.swift +++ b/Package.swift @@ -3,8 +3,8 @@ import PackageDescription -let tag = "v0.7.0-rc.33" -let checksum = "a6bc32bf63117e80141f9e4cc529d33e16e141460b269125f4150e1251a1108a" +let tag = "v0.7.0-rc.36" +let checksum = "de56fe19149808ccc5e517047ea7bf6b4d5d2c2e33d3ad539ef0155bf1aec8f7" let url = "https://github.com/synonymdev/ldk-node/releases/download/\(tag)/LDKNodeFFI.xcframework.zip" let package = Package( diff --git a/bindings/kotlin/ldk-node-android/gradle.properties b/bindings/kotlin/ldk-node-android/gradle.properties index 456d7a65e..6b8e6a518 100644 --- a/bindings/kotlin/ldk-node-android/gradle.properties +++ b/bindings/kotlin/ldk-node-android/gradle.properties @@ -3,4 +3,4 @@ android.useAndroidX=true android.enableJetifier=true kotlin.code.style=official group=com.synonym -version=0.7.0-rc.33 +version=0.7.0-rc.36 diff --git a/bindings/kotlin/ldk-node-android/lib/src/main/jniLibs/arm64-v8a/libldk_node.so b/bindings/kotlin/ldk-node-android/lib/src/main/jniLibs/arm64-v8a/libldk_node.so index 945ec12b1..b5cd3ffea 100755 Binary files a/bindings/kotlin/ldk-node-android/lib/src/main/jniLibs/arm64-v8a/libldk_node.so and b/bindings/kotlin/ldk-node-android/lib/src/main/jniLibs/arm64-v8a/libldk_node.so differ diff --git a/bindings/kotlin/ldk-node-android/lib/src/main/jniLibs/armeabi-v7a/libldk_node.so b/bindings/kotlin/ldk-node-android/lib/src/main/jniLibs/armeabi-v7a/libldk_node.so index 71ba6c29b..4562ceaa7 100755 Binary files a/bindings/kotlin/ldk-node-android/lib/src/main/jniLibs/armeabi-v7a/libldk_node.so and b/bindings/kotlin/ldk-node-android/lib/src/main/jniLibs/armeabi-v7a/libldk_node.so differ diff --git a/bindings/kotlin/ldk-node-android/lib/src/main/jniLibs/x86_64/libldk_node.so b/bindings/kotlin/ldk-node-android/lib/src/main/jniLibs/x86_64/libldk_node.so index 9841b8bde..4c7768d1d 100755 Binary files a/bindings/kotlin/ldk-node-android/lib/src/main/jniLibs/x86_64/libldk_node.so and b/bindings/kotlin/ldk-node-android/lib/src/main/jniLibs/x86_64/libldk_node.so differ diff --git a/bindings/kotlin/ldk-node-android/lib/src/main/kotlin/org/lightningdevkit/ldknode/ldk_node.android.kt b/bindings/kotlin/ldk-node-android/lib/src/main/kotlin/org/lightningdevkit/ldknode/ldk_node.android.kt index d066f53c8..5a12c371c 100644 --- a/bindings/kotlin/ldk-node-android/lib/src/main/kotlin/org/lightningdevkit/ldknode/ldk_node.android.kt +++ b/bindings/kotlin/ldk-node-android/lib/src/main/kotlin/org/lightningdevkit/ldknode/ldk_node.android.kt @@ -1517,6 +1517,8 @@ internal typealias UniffiVTableCallbackInterfaceVssHeaderProviderUniffiByValue = + + @@ -1971,6 +1973,11 @@ internal interface UniffiLib : Library { `headerProvider`: Pointer?, uniffiCallStatus: UniffiRustCallStatus, ): Pointer? + fun uniffi_ldk_node_fn_method_builder_set_accept_stale_channel_monitors( + `ptr`: Pointer?, + `accept`: Byte, + uniffiCallStatus: UniffiRustCallStatus, + ): Unit fun uniffi_ldk_node_fn_method_builder_set_address_type( `ptr`: Pointer?, `addressType`: RustBufferByValue, @@ -3123,6 +3130,8 @@ internal interface UniffiLib : Library { ): Short fun uniffi_ldk_node_checksum_method_builder_build_with_vss_store_and_header_provider( ): Short + fun uniffi_ldk_node_checksum_method_builder_set_accept_stale_channel_monitors( + ): Short fun uniffi_ldk_node_checksum_method_builder_set_address_type( ): Short fun uniffi_ldk_node_checksum_method_builder_set_address_types_to_monitor( @@ -3614,6 +3623,9 @@ private fun uniffiCheckApiChecksums(lib: UniffiLib) { if (lib.uniffi_ldk_node_checksum_method_builder_build_with_vss_store_and_header_provider() != 9090.toShort()) { throw RuntimeException("UniFFI API checksum mismatch: try cleaning and rebuilding your project") } + if (lib.uniffi_ldk_node_checksum_method_builder_set_accept_stale_channel_monitors() != 25727.toShort()) { + throw RuntimeException("UniFFI API checksum mismatch: try cleaning and rebuilding your project") + } if (lib.uniffi_ldk_node_checksum_method_builder_set_address_type() != 647.toShort()) { throw RuntimeException("UniFFI API checksum mismatch: try cleaning and rebuilding your project") } @@ -5780,6 +5792,18 @@ open class Builder: Disposable, BuilderInterface { }) } + override fun `setAcceptStaleChannelMonitors`(`accept`: kotlin.Boolean) { + callWithPointer { + uniffiRustCall { uniffiRustCallStatus -> + UniffiLib.INSTANCE.uniffi_ldk_node_fn_method_builder_set_accept_stale_channel_monitors( + it, + FfiConverterBoolean.lower(`accept`), + uniffiRustCallStatus, + ) + } + } + } + override fun `setAddressType`(`addressType`: AddressType) { callWithPointer { uniffiRustCall { uniffiRustCallStatus -> @@ -10290,13 +10314,14 @@ object FfiConverterTypeBuildError : FfiConverterRustBuffer { 7 -> BuildException.InvalidNodeAlias(FfiConverterString.read(buf)) 8 -> BuildException.RuntimeSetupFailed(FfiConverterString.read(buf)) 9 -> BuildException.ReadFailed(FfiConverterString.read(buf)) - 10 -> BuildException.WriteFailed(FfiConverterString.read(buf)) - 11 -> BuildException.StoragePathAccessFailed(FfiConverterString.read(buf)) - 12 -> BuildException.KvStoreSetupFailed(FfiConverterString.read(buf)) - 13 -> BuildException.WalletSetupFailed(FfiConverterString.read(buf)) - 14 -> BuildException.LoggerSetupFailed(FfiConverterString.read(buf)) - 15 -> BuildException.NetworkMismatch(FfiConverterString.read(buf)) - 16 -> BuildException.AsyncPaymentsConfigMismatch(FfiConverterString.read(buf)) + 10 -> BuildException.DangerousValue(FfiConverterString.read(buf)) + 11 -> BuildException.WriteFailed(FfiConverterString.read(buf)) + 12 -> BuildException.StoragePathAccessFailed(FfiConverterString.read(buf)) + 13 -> BuildException.KvStoreSetupFailed(FfiConverterString.read(buf)) + 14 -> BuildException.WalletSetupFailed(FfiConverterString.read(buf)) + 15 -> BuildException.LoggerSetupFailed(FfiConverterString.read(buf)) + 16 -> BuildException.NetworkMismatch(FfiConverterString.read(buf)) + 17 -> BuildException.AsyncPaymentsConfigMismatch(FfiConverterString.read(buf)) else -> throw RuntimeException("invalid error enum value, something is very wrong!!") } } @@ -10343,34 +10368,38 @@ object FfiConverterTypeBuildError : FfiConverterRustBuffer { buf.putInt(9) Unit } - is BuildException.WriteFailed -> { + is BuildException.DangerousValue -> { buf.putInt(10) Unit } - is BuildException.StoragePathAccessFailed -> { + is BuildException.WriteFailed -> { buf.putInt(11) Unit } - is BuildException.KvStoreSetupFailed -> { + is BuildException.StoragePathAccessFailed -> { buf.putInt(12) Unit } - is BuildException.WalletSetupFailed -> { + is BuildException.KvStoreSetupFailed -> { buf.putInt(13) Unit } - is BuildException.LoggerSetupFailed -> { + is BuildException.WalletSetupFailed -> { buf.putInt(14) Unit } - is BuildException.NetworkMismatch -> { + is BuildException.LoggerSetupFailed -> { buf.putInt(15) Unit } - is BuildException.AsyncPaymentsConfigMismatch -> { + is BuildException.NetworkMismatch -> { buf.putInt(16) Unit } + is BuildException.AsyncPaymentsConfigMismatch -> { + buf.putInt(17) + Unit + } }.let { /* this makes the `when` an expression, which ensures it is exhaustive */ } } } diff --git a/bindings/kotlin/ldk-node-android/lib/src/main/kotlin/org/lightningdevkit/ldknode/ldk_node.common.kt b/bindings/kotlin/ldk-node-android/lib/src/main/kotlin/org/lightningdevkit/ldknode/ldk_node.common.kt index 8159ef6a5..e79493a9f 100644 --- a/bindings/kotlin/ldk-node-android/lib/src/main/kotlin/org/lightningdevkit/ldknode/ldk_node.common.kt +++ b/bindings/kotlin/ldk-node-android/lib/src/main/kotlin/org/lightningdevkit/ldknode/ldk_node.common.kt @@ -299,6 +299,8 @@ interface BuilderInterface { @Throws(BuildException::class) fun `buildWithVssStoreAndHeaderProvider`(`vssUrl`: kotlin.String, `storeId`: kotlin.String, `headerProvider`: VssHeaderProvider): Node + fun `setAcceptStaleChannelMonitors`(`accept`: kotlin.Boolean) + fun `setAddressType`(`addressType`: AddressType) fun `setAddressTypesToMonitor`(`addressTypesToMonitor`: List) @@ -1294,6 +1296,8 @@ sealed class BuildException(message: String): kotlin.Exception(message) { class ReadFailed(message: String) : BuildException(message) + class DangerousValue(message: String) : BuildException(message) + class WriteFailed(message: String) : BuildException(message) class StoragePathAccessFailed(message: String) : BuildException(message) diff --git a/bindings/kotlin/ldk-node-jvm/gradle.properties b/bindings/kotlin/ldk-node-jvm/gradle.properties index db522e311..e63300c91 100644 --- a/bindings/kotlin/ldk-node-jvm/gradle.properties +++ b/bindings/kotlin/ldk-node-jvm/gradle.properties @@ -1,4 +1,4 @@ org.gradle.jvmargs=-Xmx1536m kotlin.code.style=official group=com.synonym -version=0.7.0-rc.33 +version=0.7.0-rc.36 diff --git a/bindings/kotlin/ldk-node-jvm/lib/src/main/kotlin/org/lightningdevkit/ldknode/ldk_node.common.kt b/bindings/kotlin/ldk-node-jvm/lib/src/main/kotlin/org/lightningdevkit/ldknode/ldk_node.common.kt index 8159ef6a5..e79493a9f 100644 --- a/bindings/kotlin/ldk-node-jvm/lib/src/main/kotlin/org/lightningdevkit/ldknode/ldk_node.common.kt +++ b/bindings/kotlin/ldk-node-jvm/lib/src/main/kotlin/org/lightningdevkit/ldknode/ldk_node.common.kt @@ -299,6 +299,8 @@ interface BuilderInterface { @Throws(BuildException::class) fun `buildWithVssStoreAndHeaderProvider`(`vssUrl`: kotlin.String, `storeId`: kotlin.String, `headerProvider`: VssHeaderProvider): Node + fun `setAcceptStaleChannelMonitors`(`accept`: kotlin.Boolean) + fun `setAddressType`(`addressType`: AddressType) fun `setAddressTypesToMonitor`(`addressTypesToMonitor`: List) @@ -1294,6 +1296,8 @@ sealed class BuildException(message: String): kotlin.Exception(message) { class ReadFailed(message: String) : BuildException(message) + class DangerousValue(message: String) : BuildException(message) + class WriteFailed(message: String) : BuildException(message) class StoragePathAccessFailed(message: String) : BuildException(message) diff --git a/bindings/kotlin/ldk-node-jvm/lib/src/main/kotlin/org/lightningdevkit/ldknode/ldk_node.jvm.kt b/bindings/kotlin/ldk-node-jvm/lib/src/main/kotlin/org/lightningdevkit/ldknode/ldk_node.jvm.kt index f2a9e599b..f61b26797 100644 --- a/bindings/kotlin/ldk-node-jvm/lib/src/main/kotlin/org/lightningdevkit/ldknode/ldk_node.jvm.kt +++ b/bindings/kotlin/ldk-node-jvm/lib/src/main/kotlin/org/lightningdevkit/ldknode/ldk_node.jvm.kt @@ -1515,6 +1515,8 @@ internal typealias UniffiVTableCallbackInterfaceVssHeaderProviderUniffiByValue = + + @@ -1969,6 +1971,11 @@ internal interface UniffiLib : Library { `headerProvider`: Pointer?, uniffiCallStatus: UniffiRustCallStatus, ): Pointer? + fun uniffi_ldk_node_fn_method_builder_set_accept_stale_channel_monitors( + `ptr`: Pointer?, + `accept`: Byte, + uniffiCallStatus: UniffiRustCallStatus, + ): Unit fun uniffi_ldk_node_fn_method_builder_set_address_type( `ptr`: Pointer?, `addressType`: RustBufferByValue, @@ -3121,6 +3128,8 @@ internal interface UniffiLib : Library { ): Short fun uniffi_ldk_node_checksum_method_builder_build_with_vss_store_and_header_provider( ): Short + fun uniffi_ldk_node_checksum_method_builder_set_accept_stale_channel_monitors( + ): Short fun uniffi_ldk_node_checksum_method_builder_set_address_type( ): Short fun uniffi_ldk_node_checksum_method_builder_set_address_types_to_monitor( @@ -3612,6 +3621,9 @@ private fun uniffiCheckApiChecksums(lib: UniffiLib) { if (lib.uniffi_ldk_node_checksum_method_builder_build_with_vss_store_and_header_provider() != 9090.toShort()) { throw RuntimeException("UniFFI API checksum mismatch: try cleaning and rebuilding your project") } + if (lib.uniffi_ldk_node_checksum_method_builder_set_accept_stale_channel_monitors() != 25727.toShort()) { + throw RuntimeException("UniFFI API checksum mismatch: try cleaning and rebuilding your project") + } if (lib.uniffi_ldk_node_checksum_method_builder_set_address_type() != 647.toShort()) { throw RuntimeException("UniFFI API checksum mismatch: try cleaning and rebuilding your project") } @@ -5769,6 +5781,18 @@ open class Builder: Disposable, BuilderInterface { }) } + override fun `setAcceptStaleChannelMonitors`(`accept`: kotlin.Boolean) { + callWithPointer { + uniffiRustCall { uniffiRustCallStatus -> + UniffiLib.INSTANCE.uniffi_ldk_node_fn_method_builder_set_accept_stale_channel_monitors( + it, + FfiConverterBoolean.lower(`accept`), + uniffiRustCallStatus, + ) + } + } + } + override fun `setAddressType`(`addressType`: AddressType) { callWithPointer { uniffiRustCall { uniffiRustCallStatus -> @@ -10279,13 +10303,14 @@ object FfiConverterTypeBuildError : FfiConverterRustBuffer { 7 -> BuildException.InvalidNodeAlias(FfiConverterString.read(buf)) 8 -> BuildException.RuntimeSetupFailed(FfiConverterString.read(buf)) 9 -> BuildException.ReadFailed(FfiConverterString.read(buf)) - 10 -> BuildException.WriteFailed(FfiConverterString.read(buf)) - 11 -> BuildException.StoragePathAccessFailed(FfiConverterString.read(buf)) - 12 -> BuildException.KvStoreSetupFailed(FfiConverterString.read(buf)) - 13 -> BuildException.WalletSetupFailed(FfiConverterString.read(buf)) - 14 -> BuildException.LoggerSetupFailed(FfiConverterString.read(buf)) - 15 -> BuildException.NetworkMismatch(FfiConverterString.read(buf)) - 16 -> BuildException.AsyncPaymentsConfigMismatch(FfiConverterString.read(buf)) + 10 -> BuildException.DangerousValue(FfiConverterString.read(buf)) + 11 -> BuildException.WriteFailed(FfiConverterString.read(buf)) + 12 -> BuildException.StoragePathAccessFailed(FfiConverterString.read(buf)) + 13 -> BuildException.KvStoreSetupFailed(FfiConverterString.read(buf)) + 14 -> BuildException.WalletSetupFailed(FfiConverterString.read(buf)) + 15 -> BuildException.LoggerSetupFailed(FfiConverterString.read(buf)) + 16 -> BuildException.NetworkMismatch(FfiConverterString.read(buf)) + 17 -> BuildException.AsyncPaymentsConfigMismatch(FfiConverterString.read(buf)) else -> throw RuntimeException("invalid error enum value, something is very wrong!!") } } @@ -10332,34 +10357,38 @@ object FfiConverterTypeBuildError : FfiConverterRustBuffer { buf.putInt(9) Unit } - is BuildException.WriteFailed -> { + is BuildException.DangerousValue -> { buf.putInt(10) Unit } - is BuildException.StoragePathAccessFailed -> { + is BuildException.WriteFailed -> { buf.putInt(11) Unit } - is BuildException.KvStoreSetupFailed -> { + is BuildException.StoragePathAccessFailed -> { buf.putInt(12) Unit } - is BuildException.WalletSetupFailed -> { + is BuildException.KvStoreSetupFailed -> { buf.putInt(13) Unit } - is BuildException.LoggerSetupFailed -> { + is BuildException.WalletSetupFailed -> { buf.putInt(14) Unit } - is BuildException.NetworkMismatch -> { + is BuildException.LoggerSetupFailed -> { buf.putInt(15) Unit } - is BuildException.AsyncPaymentsConfigMismatch -> { + is BuildException.NetworkMismatch -> { buf.putInt(16) Unit } + is BuildException.AsyncPaymentsConfigMismatch -> { + buf.putInt(17) + Unit + } }.let { /* this makes the `when` an expression, which ensures it is exhaustive */ } } } diff --git a/bindings/kotlin/ldk-node-jvm/lib/src/main/resources/darwin-aarch64/libldk_node.dylib b/bindings/kotlin/ldk-node-jvm/lib/src/main/resources/darwin-aarch64/libldk_node.dylib index 2051b151b..e5bfee794 100644 Binary files a/bindings/kotlin/ldk-node-jvm/lib/src/main/resources/darwin-aarch64/libldk_node.dylib and b/bindings/kotlin/ldk-node-jvm/lib/src/main/resources/darwin-aarch64/libldk_node.dylib differ diff --git a/bindings/kotlin/ldk-node-jvm/lib/src/main/resources/darwin-x86-64/libldk_node.dylib b/bindings/kotlin/ldk-node-jvm/lib/src/main/resources/darwin-x86-64/libldk_node.dylib index bb880a77c..ad19d4d19 100644 Binary files a/bindings/kotlin/ldk-node-jvm/lib/src/main/resources/darwin-x86-64/libldk_node.dylib and b/bindings/kotlin/ldk-node-jvm/lib/src/main/resources/darwin-x86-64/libldk_node.dylib differ diff --git a/bindings/ldk_node.udl b/bindings/ldk_node.udl index e22ac7f67..6f2f57380 100644 --- a/bindings/ldk_node.udl +++ b/bindings/ldk_node.udl @@ -110,6 +110,7 @@ interface Builder { void set_entropy_seed_bytes(sequence seed_bytes); void set_entropy_bip39_mnemonic(Mnemonic mnemonic, string? passphrase); void set_channel_data_migration(ChannelDataMigration migration); + void set_accept_stale_channel_monitors(boolean accept); void set_chain_source_esplora(string server_url, EsploraSyncConfig? config); void set_chain_source_electrum(string server_url, ElectrumSyncConfig? config); void set_chain_source_bitcoind_rpc(string rpc_host, u16 rpc_port, string rpc_user, string rpc_password); @@ -457,6 +458,7 @@ enum BuildError { "InvalidNodeAlias", "RuntimeSetupFailed", "ReadFailed", + "DangerousValue", "WriteFailed", "StoragePathAccessFailed", "KVStoreSetupFailed", diff --git a/bindings/python/pyproject.toml b/bindings/python/pyproject.toml index a28a2034f..9e72b7a6c 100644 --- a/bindings/python/pyproject.toml +++ b/bindings/python/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "ldk_node" -version = "0.7.0-rc.33" +version = "0.7.0-rc.36" authors = [ { name="Elias Rohrer", email="dev@tnull.de" }, ] diff --git a/bindings/python/src/ldk_node/ldk_node.py b/bindings/python/src/ldk_node/ldk_node.py index 72b34dd51..22fc4ff43 100644 --- a/bindings/python/src/ldk_node/ldk_node.py +++ b/bindings/python/src/ldk_node/ldk_node.py @@ -601,6 +601,8 @@ def _uniffi_check_api_checksums(lib): raise InternalError("UniFFI API checksum mismatch: try cleaning and rebuilding your project") if lib.uniffi_ldk_node_checksum_method_builder_build_with_vss_store_and_header_provider() != 9090: raise InternalError("UniFFI API checksum mismatch: try cleaning and rebuilding your project") + if lib.uniffi_ldk_node_checksum_method_builder_set_accept_stale_channel_monitors() != 25727: + raise InternalError("UniFFI API checksum mismatch: try cleaning and rebuilding your project") if lib.uniffi_ldk_node_checksum_method_builder_set_address_type() != 647: raise InternalError("UniFFI API checksum mismatch: try cleaning and rebuilding your project") if lib.uniffi_ldk_node_checksum_method_builder_set_address_types_to_monitor() != 23561: @@ -1486,6 +1488,12 @@ class _UniffiVTableCallbackInterfaceVssHeaderProvider(ctypes.Structure): ctypes.POINTER(_UniffiRustCallStatus), ) _UniffiLib.uniffi_ldk_node_fn_method_builder_build_with_vss_store_and_header_provider.restype = ctypes.c_void_p +_UniffiLib.uniffi_ldk_node_fn_method_builder_set_accept_stale_channel_monitors.argtypes = ( + ctypes.c_void_p, + ctypes.c_int8, + ctypes.POINTER(_UniffiRustCallStatus), +) +_UniffiLib.uniffi_ldk_node_fn_method_builder_set_accept_stale_channel_monitors.restype = None _UniffiLib.uniffi_ldk_node_fn_method_builder_set_address_type.argtypes = ( ctypes.c_void_p, _UniffiRustBuffer, @@ -2927,6 +2935,9 @@ class _UniffiVTableCallbackInterfaceVssHeaderProvider(ctypes.Structure): _UniffiLib.uniffi_ldk_node_checksum_method_builder_build_with_vss_store_and_header_provider.argtypes = ( ) _UniffiLib.uniffi_ldk_node_checksum_method_builder_build_with_vss_store_and_header_provider.restype = ctypes.c_uint16 +_UniffiLib.uniffi_ldk_node_checksum_method_builder_set_accept_stale_channel_monitors.argtypes = ( +) +_UniffiLib.uniffi_ldk_node_checksum_method_builder_set_accept_stale_channel_monitors.restype = ctypes.c_uint16 _UniffiLib.uniffi_ldk_node_checksum_method_builder_set_address_type.argtypes = ( ) _UniffiLib.uniffi_ldk_node_checksum_method_builder_set_address_type.restype = ctypes.c_uint16 @@ -4642,6 +4653,8 @@ def build_with_vss_store_and_fixed_headers(self, vss_url: "str",store_id: "str", raise NotImplementedError def build_with_vss_store_and_header_provider(self, vss_url: "str",store_id: "str",header_provider: "VssHeaderProvider"): raise NotImplementedError + def set_accept_stale_channel_monitors(self, accept: "bool"): + raise NotImplementedError def set_address_type(self, address_type: "AddressType"): raise NotImplementedError def set_address_types_to_monitor(self, address_types_to_monitor: "typing.List[AddressType]"): @@ -4800,6 +4813,17 @@ def build_with_vss_store_and_header_provider(self, vss_url: "str",store_id: "str + def set_accept_stale_channel_monitors(self, accept: "bool") -> None: + _UniffiConverterBool.check_lower(accept) + + _uniffi_rust_call(_UniffiLib.uniffi_ldk_node_fn_method_builder_set_accept_stale_channel_monitors,self._uniffi_clone_pointer(), + _UniffiConverterBool.lower(accept)) + + + + + + def set_address_type(self, address_type: "AddressType") -> None: _UniffiConverterTypeAddressType.check_lower(address_type) @@ -9787,6 +9811,11 @@ class ReadFailed(_UniffiTempBuildError): def __repr__(self): return "BuildError.ReadFailed({})".format(repr(str(self))) _UniffiTempBuildError.ReadFailed = ReadFailed # type: ignore + class DangerousValue(_UniffiTempBuildError): + + def __repr__(self): + return "BuildError.DangerousValue({})".format(repr(str(self))) + _UniffiTempBuildError.DangerousValue = DangerousValue # type: ignore class WriteFailed(_UniffiTempBuildError): def __repr__(self): @@ -9868,30 +9897,34 @@ def read(buf): _UniffiConverterString.read(buf), ) if variant == 10: - return BuildError.WriteFailed( + return BuildError.DangerousValue( _UniffiConverterString.read(buf), ) if variant == 11: - return BuildError.StoragePathAccessFailed( + return BuildError.WriteFailed( _UniffiConverterString.read(buf), ) if variant == 12: - return BuildError.KvStoreSetupFailed( + return BuildError.StoragePathAccessFailed( _UniffiConverterString.read(buf), ) if variant == 13: - return BuildError.WalletSetupFailed( + return BuildError.KvStoreSetupFailed( _UniffiConverterString.read(buf), ) if variant == 14: - return BuildError.LoggerSetupFailed( + return BuildError.WalletSetupFailed( _UniffiConverterString.read(buf), ) if variant == 15: - return BuildError.NetworkMismatch( + return BuildError.LoggerSetupFailed( _UniffiConverterString.read(buf), ) if variant == 16: + return BuildError.NetworkMismatch( + _UniffiConverterString.read(buf), + ) + if variant == 17: return BuildError.AsyncPaymentsConfigMismatch( _UniffiConverterString.read(buf), ) @@ -9917,6 +9950,8 @@ def check_lower(value): return if isinstance(value, BuildError.ReadFailed): return + if isinstance(value, BuildError.DangerousValue): + return if isinstance(value, BuildError.WriteFailed): return if isinstance(value, BuildError.StoragePathAccessFailed): @@ -9952,20 +9987,22 @@ def write(value, buf): buf.write_i32(8) if isinstance(value, BuildError.ReadFailed): buf.write_i32(9) - if isinstance(value, BuildError.WriteFailed): + if isinstance(value, BuildError.DangerousValue): buf.write_i32(10) - if isinstance(value, BuildError.StoragePathAccessFailed): + if isinstance(value, BuildError.WriteFailed): buf.write_i32(11) - if isinstance(value, BuildError.KvStoreSetupFailed): + if isinstance(value, BuildError.StoragePathAccessFailed): buf.write_i32(12) - if isinstance(value, BuildError.WalletSetupFailed): + if isinstance(value, BuildError.KvStoreSetupFailed): buf.write_i32(13) - if isinstance(value, BuildError.LoggerSetupFailed): + if isinstance(value, BuildError.WalletSetupFailed): buf.write_i32(14) - if isinstance(value, BuildError.NetworkMismatch): + if isinstance(value, BuildError.LoggerSetupFailed): buf.write_i32(15) - if isinstance(value, BuildError.AsyncPaymentsConfigMismatch): + if isinstance(value, BuildError.NetworkMismatch): buf.write_i32(16) + if isinstance(value, BuildError.AsyncPaymentsConfigMismatch): + buf.write_i32(17) diff --git a/bindings/swift/Sources/LDKNode/LDKNode.swift b/bindings/swift/Sources/LDKNode/LDKNode.swift index 24a98dfc7..12dc76ce2 100644 --- a/bindings/swift/Sources/LDKNode/LDKNode.swift +++ b/bindings/swift/Sources/LDKNode/LDKNode.swift @@ -1567,6 +1567,8 @@ public protocol BuilderProtocol: AnyObject { func buildWithVssStoreAndHeaderProvider(vssUrl: String, storeId: String, headerProvider: VssHeaderProvider) throws -> Node + func setAcceptStaleChannelMonitors(accept: Bool) + func setAddressType(addressType: AddressType) func setAddressTypesToMonitor(addressTypesToMonitor: [AddressType]) @@ -1719,6 +1721,13 @@ open class Builder: }) } + open func setAcceptStaleChannelMonitors(accept: Bool) { + try! rustCall { + uniffi_ldk_node_fn_method_builder_set_accept_stale_channel_monitors(self.uniffiClonePointer(), + FfiConverterBool.lower(accept), $0) + } + } + open func setAddressType(addressType: AddressType) { try! rustCall { uniffi_ldk_node_fn_method_builder_set_address_type(self.uniffiClonePointer(), @@ -7396,6 +7405,8 @@ public enum BuildError { case ReadFailed(message: String) + case DangerousValue(message: String) + case WriteFailed(message: String) case StoragePathAccessFailed(message: String) @@ -7456,31 +7467,35 @@ public struct FfiConverterTypeBuildError: FfiConverterRustBuffer { message: FfiConverterString.read(from: &buf) ) - case 10: return try .WriteFailed( + case 10: return try .DangerousValue( message: FfiConverterString.read(from: &buf) ) - case 11: return try .StoragePathAccessFailed( + case 11: return try .WriteFailed( message: FfiConverterString.read(from: &buf) ) - case 12: return try .KvStoreSetupFailed( + case 12: return try .StoragePathAccessFailed( message: FfiConverterString.read(from: &buf) ) - case 13: return try .WalletSetupFailed( + case 13: return try .KvStoreSetupFailed( message: FfiConverterString.read(from: &buf) ) - case 14: return try .LoggerSetupFailed( + case 14: return try .WalletSetupFailed( message: FfiConverterString.read(from: &buf) ) - case 15: return try .NetworkMismatch( + case 15: return try .LoggerSetupFailed( message: FfiConverterString.read(from: &buf) ) - case 16: return try .AsyncPaymentsConfigMismatch( + case 16: return try .NetworkMismatch( + message: FfiConverterString.read(from: &buf) + ) + + case 17: return try .AsyncPaymentsConfigMismatch( message: FfiConverterString.read(from: &buf) ) @@ -7508,20 +7523,22 @@ public struct FfiConverterTypeBuildError: FfiConverterRustBuffer { writeInt(&buf, Int32(8)) case .ReadFailed(_ /* message is ignored*/ ): writeInt(&buf, Int32(9)) - case .WriteFailed(_ /* message is ignored*/ ): + case .DangerousValue(_ /* message is ignored*/ ): writeInt(&buf, Int32(10)) - case .StoragePathAccessFailed(_ /* message is ignored*/ ): + case .WriteFailed(_ /* message is ignored*/ ): writeInt(&buf, Int32(11)) - case .KvStoreSetupFailed(_ /* message is ignored*/ ): + case .StoragePathAccessFailed(_ /* message is ignored*/ ): writeInt(&buf, Int32(12)) - case .WalletSetupFailed(_ /* message is ignored*/ ): + case .KvStoreSetupFailed(_ /* message is ignored*/ ): writeInt(&buf, Int32(13)) - case .LoggerSetupFailed(_ /* message is ignored*/ ): + case .WalletSetupFailed(_ /* message is ignored*/ ): writeInt(&buf, Int32(14)) - case .NetworkMismatch(_ /* message is ignored*/ ): + case .LoggerSetupFailed(_ /* message is ignored*/ ): writeInt(&buf, Int32(15)) - case .AsyncPaymentsConfigMismatch(_ /* message is ignored*/ ): + case .NetworkMismatch(_ /* message is ignored*/ ): writeInt(&buf, Int32(16)) + case .AsyncPaymentsConfigMismatch(_ /* message is ignored*/ ): + writeInt(&buf, Int32(17)) } } } @@ -12363,6 +12380,9 @@ private var initializationResult: InitializationResult = { if uniffi_ldk_node_checksum_method_builder_build_with_vss_store_and_header_provider() != 9090 { return InitializationResult.apiChecksumMismatch } + if uniffi_ldk_node_checksum_method_builder_set_accept_stale_channel_monitors() != 25727 { + return InitializationResult.apiChecksumMismatch + } if uniffi_ldk_node_checksum_method_builder_set_address_type() != 647 { return InitializationResult.apiChecksumMismatch } diff --git a/src/builder.rs b/src/builder.rs index f5b6ce6d7..00269e04b 100644 --- a/src/builder.rs +++ b/src/builder.rs @@ -9,7 +9,7 @@ use std::collections::HashMap; use std::convert::TryInto; use std::default::Default; use std::path::PathBuf; -use std::sync::atomic::AtomicU32; +use std::sync::atomic::{AtomicBool, AtomicU32}; use std::sync::{Arc, Mutex, Once, RwLock}; use std::time::SystemTime; use std::{fmt, fs}; @@ -202,6 +202,11 @@ pub enum BuildError { /// /// [`KVStore`]: lightning::util::persist::KVStoreSync ReadFailed, + /// The deserialized channel data would be dangerous to use, typically because + /// channel monitors are stale compared to the channel manager. + /// + /// Use [`NodeBuilder::set_accept_stale_channel_monitors`] to recover. + DangerousValue, /// We failed to write data to the [`KVStore`]. /// /// [`KVStore`]: lightning::util::persist::KVStoreSync @@ -239,6 +244,11 @@ impl fmt::Display for BuildError { }, Self::RuntimeSetupFailed => write!(f, "Failed to setup a runtime."), Self::ReadFailed => write!(f, "Failed to read from store."), + Self::DangerousValue => write!( + f, + "Deserialized channel data is dangerous to use (stale channel monitors). \ + Use set_accept_stale_channel_monitors(true) to recover." + ), Self::WriteFailed => write!(f, "Failed to write to store."), Self::StoragePathAccessFailed => write!(f, "Failed to access the given storage path."), Self::KVStoreSetupFailed => write!(f, "Failed to setup KVStore."), @@ -279,6 +289,7 @@ pub struct NodeBuilder { runtime_handle: Option, pathfinding_scores_sync_config: Option, channel_data_migration: Option, + accept_stale_channel_monitors: bool, } impl NodeBuilder { @@ -309,6 +320,7 @@ impl NodeBuilder { async_payments_role: None, pathfinding_scores_sync_config, channel_data_migration, + accept_stale_channel_monitors: false, } } @@ -361,6 +373,19 @@ impl NodeBuilder { self } + /// Accept stale channel monitors on startup instead of failing with `DangerousValue`. + /// + /// When enabled, stale monitors have their `update_id` force-synced to match the + /// `ChannelManager`. The monitor's commitment state remains stale until the next real + /// channel update (e.g. a fee update round-trip after reconnecting to the peer). + /// + /// Use this for recovery after monitor data was overwritten by a migration or backup restore. + /// Chain sync should be delayed until monitors are healed via a commitment round-trip. + pub fn set_accept_stale_channel_monitors(&mut self, accept: bool) -> &mut Self { + self.accept_stale_channel_monitors = accept; + self + } + /// Configures the [`Node`] instance to source its chain data from the given Esplora server. /// /// If no `sync_config` is given, default values are used. See [`EsploraSyncConfig`] for more @@ -813,6 +838,7 @@ impl NodeBuilder { logger, Arc::new(vss_store), self.channel_data_migration.as_ref(), + self.accept_stale_channel_monitors, ) } @@ -848,6 +874,7 @@ impl NodeBuilder { logger, kv_store, self.channel_data_migration.as_ref(), + self.accept_stale_channel_monitors, ) } } @@ -917,6 +944,13 @@ impl ArcedNodeBuilder { self.inner.write().unwrap().set_channel_data_migration(migration); } + /// Accept stale channel monitors on startup instead of failing. + /// + /// See [`NodeBuilder::set_accept_stale_channel_monitors`] for details. + pub fn set_accept_stale_channel_monitors(&self, accept: bool) { + self.inner.write().unwrap().set_accept_stale_channel_monitors(accept); + } + /// Configures the [`Node`] instance to source its chain data from the given Esplora server. /// /// If no `sync_config` is given, default values are used. See [`EsploraSyncConfig`] for more @@ -1445,13 +1479,13 @@ where } }, Err(e) => { - log_error!( + log_warn!( logger, - "Failed to deserialize existing monitor {}, refusing migration write to avoid overwriting potentially newer state: {:?}", + "Failed to deserialize existing monitor {}, skipping migration to avoid overwriting potentially newer state: {:?}", monitor_key, e ); - return Err(BuildError::ReadFailed); + false }, } }, @@ -1498,7 +1532,7 @@ fn build_with_store_internal( pathfinding_scores_sync_config: Option<&PathfindingScoresSyncConfig>, async_payments_role: Option, seed_bytes: [u8; 64], runtime: Arc, logger: Arc, kv_store: Arc, - channel_data_migration: Option<&ChannelDataMigration>, + channel_data_migration: Option<&ChannelDataMigration>, accept_stale_channel_monitors: bool, ) -> Result { optionally_install_rustls_cryptoprovider(); @@ -1972,7 +2006,7 @@ fn build_with_store_internal( let mut reader = Cursor::new(res); let channel_monitor_references = channel_monitors.iter().map(|(_, chanmon)| chanmon).collect(); - let read_args = ChannelManagerReadArgs::new( + let mut read_args = ChannelManagerReadArgs::new( Arc::clone(&keys_manager), Arc::clone(&keys_manager), Arc::clone(&keys_manager), @@ -1985,10 +2019,22 @@ fn build_with_store_internal( user_config, channel_monitor_references, ); + read_args.accept_stale_channel_monitors = accept_stale_channel_monitors; let (_hash, channel_manager) = <(BlockHash, ChannelManager)>::read(&mut reader, read_args).map_err(|e| { - log_error!(logger, "Failed to read channel manager from store: {}", e); - BuildError::ReadFailed + if matches!(e, lightning::ln::msgs::DecodeError::DangerousValue) { + log_error!( + logger, + "Channel manager deserialization returned DangerousValue \ + (stale channel monitors). \ + Use set_accept_stale_channel_monitors(true) to recover: {}", + e + ); + BuildError::DangerousValue + } else { + log_error!(logger, "Failed to read channel manager from store: {}", e); + BuildError::ReadFailed + } })?; channel_manager } else { @@ -2346,6 +2392,7 @@ fn build_with_store_internal( async_payments_role, runtime_sync_intervals: Arc::new(RwLock::new(RuntimeSyncIntervals::default())), local_rgs_timestamp, + accept_stale_channel_monitors: AtomicBool::new(accept_stale_channel_monitors), }) } @@ -2696,10 +2743,12 @@ mod tests { } #[test] - fn test_migration_fails_on_corrupt_existing_data() { + fn test_migration_skips_on_corrupt_existing_data() { let (monitor_bytes, monitor_key, _, seed) = create_test_monitor_bytes(); let (store, keys_manager, logger, runtime) = make_test_deps(&seed); + let corrupt_data = vec![0xFF, 0xFE, 0xFD, 0xFC]; + // Pre-populate the store with garbage data for this monitor key. runtime .block_on(KVStore::write( @@ -2707,18 +2756,29 @@ mod tests { CHANNEL_MONITOR_PERSISTENCE_PRIMARY_NAMESPACE, CHANNEL_MONITOR_PERSISTENCE_SECONDARY_NAMESPACE, &monitor_key, - vec![0xFF, 0xFE, 0xFD, 0xFC], + corrupt_data.clone(), )) .unwrap(); - // Migration should fail because the existing data can't be deserialized - // (fail-closed to avoid overwriting potentially newer state). + // Migration should skip (not fail) when existing data can't be deserialized, + // to avoid blocking node startup while still not overwriting existing state. let migration = ChannelDataMigration { channel_manager: None, channel_monitors: vec![monitor_bytes] }; let result = apply_channel_data_migration(&migration, &store, &keys_manager, &logger, &runtime); - assert_eq!(result, Err(BuildError::ReadFailed)); + assert_eq!(result, Ok(())); + + // Verify the corrupt data was NOT overwritten — existing state preserved. + let stored = runtime + .block_on(KVStore::read( + &*store, + CHANNEL_MONITOR_PERSISTENCE_PRIMARY_NAMESPACE, + CHANNEL_MONITOR_PERSISTENCE_SECONDARY_NAMESPACE, + &monitor_key, + )) + .unwrap(); + assert_eq!(stored, corrupt_data); } #[test] diff --git a/src/lib.rs b/src/lib.rs index 8e3bbf7c8..7300bf411 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -106,7 +106,7 @@ use std::collections::HashMap; use std::default::Default; use std::net::ToSocketAddrs; use std::ops::Deref; -use std::sync::atomic::AtomicU32; +use std::sync::atomic::{AtomicBool, AtomicU32, Ordering}; use std::sync::{Arc, Mutex, RwLock}; use std::time::{Duration, Instant, SystemTime, UNIX_EPOCH}; @@ -143,11 +143,13 @@ use lightning::events::bump_transaction::{Input, Wallet as LdkWallet}; use lightning::impl_writeable_tlv_based; use lightning::ln::chan_utils::{make_funding_redeemscript, FUNDING_TRANSACTION_WITNESS_WEIGHT}; use lightning::ln::channel_state::{ChannelDetails as LdkChannelDetails, ChannelShutdownState}; -use lightning::ln::channelmanager::PaymentId; +use lightning::ln::channelmanager::{PaymentId, RecipientOnionFields, Retry}; use lightning::ln::funding::SpliceContribution; use lightning::ln::msgs::SocketAddress; use lightning::ln::types::ChannelId; use lightning::routing::gossip::NodeAlias; +use lightning::routing::router::{PaymentParameters, RouteParameters}; +use lightning::sign::EntropySource; use lightning::util::persist::KVStoreSync; use lightning_background_processor::process_events_async; use liquidity::{LSPS1Liquidity, LiquiditySource}; @@ -217,6 +219,7 @@ pub struct Node { runtime_sync_intervals: Arc>, /// Shared RGS timestamp used by LocalGraphStore to persist the timestamp alongside the graph. local_rgs_timestamp: Arc, + accept_stale_channel_monitors: AtomicBool, } impl Node { @@ -255,24 +258,13 @@ impl Node { // Set event queue for onchain event emission self.chain_source.set_event_queue(Arc::clone(&self.event_queue)); - // Spawn background task continuously syncing onchain, lightning, and fee rate cache. - let stop_sync_receiver = self.stop_sender.subscribe(); - let chain_source = Arc::clone(&self.chain_source); - let sync_wallet = Arc::clone(&self.wallet); - let sync_cman = Arc::clone(&self.channel_manager); - let sync_cmon = Arc::clone(&self.chain_monitor); - let sync_sweeper = Arc::clone(&self.output_sweeper); - self.runtime.spawn_background_task(async move { - chain_source - .continuously_sync_wallets( - stop_sync_receiver, - sync_wallet, - sync_cman, - sync_cmon, - sync_sweeper, - ) - .await; - }); + // When recovering stale monitors, defer chain sync until after the background + // processor and peer connections have had time to heal the monitors via a + // commitment round-trip (triggered by fee update after timer_tick). + let defer_chain_sync = self.accept_stale_channel_monitors.load(Ordering::Relaxed); + if !defer_chain_sync { + self.spawn_chain_sync_task(); + } if self.gossip_source.is_rgs() { let gossip_source = Arc::clone(&self.gossip_source); @@ -658,11 +650,277 @@ impl Node { }); } + if defer_chain_sync { + // Mark node as running and release the lock BEFORE the healing block, + // so stop() can be called to abort startup if needed. + *is_running_lock = true; + drop(is_running_lock); + + // Stale monitor recovery: the background processor and peer connections are now + // running. We need to trigger a commitment round-trip on each channel to heal + // the stale monitors before processing any on-chain events. + log_info!( + self.logger, + "Stale monitor recovery: triggering commitment round-trips to heal monitors \ + before starting chain sync..." + ); + + // Trigger timer_tick to queue any fee updates (works for funder channels). + self.channel_manager.timer_tick_occurred(); + + let channel_manager = Arc::clone(&self.channel_manager); + let chain_monitor = Arc::clone(&self.chain_monitor); + let keys_manager = Arc::clone(&self.keys_manager); + let chain_source = Arc::clone(&self.chain_source); + let sync_sweeper = Arc::clone(&self.output_sweeper); + let heal_logger = Arc::clone(&self.logger); + let mut stop_healing = self.stop_sender.subscribe(); + self.runtime.block_on(async move { + // Record initial monitor update_ids to detect when they advance. + let initial_update_ids: Vec<_> = channel_manager + .list_channels() + .iter() + .filter(|c| c.is_channel_ready) + .filter_map(|c| { + chain_monitor.get_monitor(c.channel_id).ok().map(|m| { + (c.channel_id, c.counterparty.node_id, m.get_latest_update_id()) + }) + }) + .collect(); + + if initial_update_ids.is_empty() { + log_info!(heal_logger, "Stale monitor recovery: no active channels to heal."); + return; + } + + log_info!( + heal_logger, + "Stale monitor recovery: tracking {} channel(s) for healing.", + initial_update_ids.len() + ); + + // Give peers time to connect and complete channel_reestablish. + tokio::select! { + _ = stop_healing.changed() => { + log_info!(heal_logger, "Stale monitor recovery: cancelled by shutdown."); + return; + } + _ = tokio::time::sleep(Duration::from_secs(5)) => {} + } + + // Sync chain tip before sending healing payments. This updates the + // ChannelManager's best block height so the keysend HTLC gets a current + // CLTV expiry. Without this, users offline >24h would get an already-expired + // HTLC that triggers a force-close when chain sync later catches up. + log_info!(heal_logger, "Stale monitor recovery: syncing chain tip..."); + let chain_synced = match chain_source + .sync_lightning_wallet( + Arc::clone(&channel_manager), + Arc::clone(&chain_monitor), + Arc::clone(&sync_sweeper), + ) + .await + { + Ok(()) => { + log_info!(heal_logger, "Stale monitor recovery: chain tip synced."); + true + }, + Err(e) => { + log_error!( + heal_logger, + "Stale monitor recovery: chain sync failed: {}. \ + Skipping healing payments to avoid stale CLTV.", + e + ); + false + }, + }; + + // Send 1-sat keysend payments to trigger commitment round-trips. + // We use real payments (not probes) because LDK rejects single-hop probes. + // The HTLC add/fail cycle triggers commitment_signed exchanges that heal + // the monitor. Cost: 1 sat per counterparty if keysend succeeds. + // Only send if chain sync succeeded — stale CLTV would force-close. + let send_heal_payment = |node_id: bitcoin::secp256k1::PublicKey| { + let payment_id = PaymentId(keys_manager.get_secure_random_bytes()); + let mut route_params = RouteParameters::from_payment_params_and_value( + PaymentParameters::from_node_id(node_id, 144), + 1_000, // 1 sat + ); + // Force direct route only — prevent routing through a different channel + // which would heal the wrong monitor. + route_params.max_total_routing_fee_msat = Some(0); + channel_manager.send_spontaneous_payment( + None, + RecipientOnionFields::spontaneous_empty(), + payment_id, + route_params, + Retry::Attempts(0), + ) + }; + + if chain_synced { + // Send one healing payment per unhealed channel. Note: for multiple + // channels with the same peer, the router may pick the same channel for + // both payments. The retry loop gives multiple chances for the router to + // select different channels as scores and capacity shift between attempts. + for (_, counterparty_node_id, _) in &initial_update_ids { + match send_heal_payment(*counterparty_node_id) { + Ok(_) => { + log_info!( + heal_logger, + "Stale monitor recovery: sent healing payment to {}", + counterparty_node_id + ); + }, + Err(e) => { + log_error!( + heal_logger, + "Stale monitor recovery: failed to send healing payment to {}: {:?}", + counterparty_node_id, + e + ); + }, + } + } + } // chain_synced + + // Poll monitor update_ids until all have advanced (healed) or timeout. + // Retry payments every 10s for channels that haven't healed yet (peer + // may have connected late). + let poll_interval = Duration::from_secs(1); + let retry_interval = Duration::from_secs(10); + let max_wait = Duration::from_secs(60); + let start = tokio::time::Instant::now(); + let mut last_retry_time = tokio::time::Instant::now(); + + loop { + if start.elapsed() >= max_wait { + let unhealed_count = initial_update_ids + .iter() + .filter(|(ch_id, _, initial_id)| { + chain_monitor + .get_monitor(*ch_id) + .ok() + .map(|m| m.get_latest_update_id() <= *initial_id) + .unwrap_or(false) + }) + .count(); + + if unhealed_count == 0 { + log_info!(heal_logger, "Stale monitor recovery: all monitors healed."); + } else { + log_error!( + heal_logger, + "Stale monitor recovery: timeout reached with {} unhealed channel(s). \ + Proceeding with chain sync anyway.", + unhealed_count + ); + } + break; + } + + let all_healed = initial_update_ids.iter().all(|(ch_id, _, initial_id)| { + chain_monitor + .get_monitor(*ch_id) + .ok() + .map(|m| m.get_latest_update_id() > *initial_id) + .unwrap_or(true) // Channel gone = consider healed + }); + + if all_healed { + log_info!( + heal_logger, + "Stale monitor recovery: all monitors healed in {:.1}s.", + start.elapsed().as_secs_f64() + ); + break; + } + + // Retry healing payments for each unhealed channel (only if chain synced). + if chain_synced && last_retry_time.elapsed() >= retry_interval { + last_retry_time = tokio::time::Instant::now(); + for (ch_id, counterparty_node_id, initial_id) in &initial_update_ids { + let healed = chain_monitor + .get_monitor(*ch_id) + .ok() + .map(|m| m.get_latest_update_id() > *initial_id) + .unwrap_or(true); + if healed { + continue; + } + + if send_heal_payment(*counterparty_node_id).is_ok() { + log_info!( + heal_logger, + "Stale monitor recovery: retried healing payment for channel {}", + ch_id + ); + } + } + } + + tokio::select! { + _ = stop_healing.changed() => { + log_info!(heal_logger, "Stale monitor recovery: cancelled by shutdown."); + break; + } + _ = tokio::time::sleep(poll_interval) => {} + } + } + }); + + // Clear the flag so subsequent start()/stop()/start() cycles don't re-trigger. + self.accept_stale_channel_monitors.store(false, Ordering::Relaxed); + + // Subscribe while holding the is_running read lock to prevent a TOCTOU + // race where stop() completes between our check and the subscribe — which + // would orphan the chain sync task (it would miss the stop signal). + { + let is_running = self.is_running.read().unwrap(); + if *is_running { + let stop_receiver = self.stop_sender.subscribe(); + drop(is_running); + self.spawn_chain_sync_task_with_receiver(stop_receiver); + log_info!(self.logger, "Startup complete."); + } else { + log_info!(self.logger, "Node was stopped during stale monitor recovery."); + } + } + return Ok(()); + } + log_info!(self.logger, "Startup complete."); *is_running_lock = true; Ok(()) } + fn spawn_chain_sync_task(&self) { + let stop_receiver = self.stop_sender.subscribe(); + self.spawn_chain_sync_task_with_receiver(stop_receiver); + } + + fn spawn_chain_sync_task_with_receiver( + &self, stop_sync_receiver: tokio::sync::watch::Receiver<()>, + ) { + let chain_source = Arc::clone(&self.chain_source); + let sync_wallet = Arc::clone(&self.wallet); + let sync_cman = Arc::clone(&self.channel_manager); + let sync_cmon = Arc::clone(&self.chain_monitor); + let sync_sweeper = Arc::clone(&self.output_sweeper); + self.runtime.spawn_background_task(async move { + chain_source + .continuously_sync_wallets( + stop_sync_receiver, + sync_wallet, + sync_cman, + sync_cmon, + sync_sweeper, + ) + .await; + }); + } + /// Disconnects all peers, stops all running background tasks, and shuts down [`Node`]. /// /// After this returns most API methods will return [`Error::NotRunning`].