From 4961ce008897182aaac6da758de6ae7de0aaa23c Mon Sep 17 00:00:00 2001 From: elizabeth Date: Mon, 7 Jul 2025 17:02:39 -0400 Subject: [PATCH 01/38] initial impl with incoming/outgoing message impl --- Cargo.lock | 1200 +++++++++++++++++++++++++++++++++-- Cargo.toml | 3 +- crates/p2p/Cargo.toml | 15 + crates/p2p/src/behaviour.rs | 165 +++++ crates/p2p/src/lib.rs | 299 +++++++++ crates/p2p/src/message.rs | 122 ++++ crates/p2p/src/protocol.rs | 73 +++ 7 files changed, 1821 insertions(+), 56 deletions(-) create mode 100644 crates/p2p/Cargo.toml create mode 100644 crates/p2p/src/behaviour.rs create mode 100644 crates/p2p/src/lib.rs create mode 100644 crates/p2p/src/message.rs create mode 100644 crates/p2p/src/protocol.rs diff --git a/Cargo.lock b/Cargo.lock index 6e8a731e..93983a60 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -226,6 +226,31 @@ dependencies = [ "generic-array", ] +[[package]] +name = "aes" +version = "0.8.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b169f7a6d4742236a0a00c541b845991d0ac43e546831af1249753ab4c3aa3a0" +dependencies = [ + "cfg-if", + "cipher", + "cpufeatures", +] + +[[package]] +name = "aes-gcm" +version = "0.10.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "831010a0f742e1209b3bcea8fab6a8e149051ba6099432c8cb2cc117dec3ead1" +dependencies = [ + "aead", + "aes", + "cipher", + "ctr", + "ghash", + "subtle", +] + [[package]] name = "aho-corasick" version = "1.1.3" @@ -1235,6 +1260,12 @@ dependencies = [ "rand 0.8.5", ] +[[package]] +name = "arrayref" +version = "0.3.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "76a2e8124351fda1ef8aaaa3bbd7ebbcb486bbcd4225aca0aa0d84bb2db8fecb" + [[package]] name = "arrayvec" version = "0.7.6" @@ -1293,6 +1324,36 @@ dependencies = [ "serde_json", ] +[[package]] +name = "async-io" +version = "2.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1237c0ae75a0f3765f58910ff9cdd0a12eeb39ab2f4c7de23262f337f0aacbb3" +dependencies = [ + "async-lock", + "cfg-if", + "concurrent-queue", + "futures-io", + "futures-lite 2.6.0", + "parking", + "polling", + "rustix 1.0.7", + "slab", + "tracing", + "windows-sys 0.59.0", +] + +[[package]] +name = "async-lock" +version = "3.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ff6e472cdea888a4bd64f342f09b3f50e1886d32afe8df3d663c01140b811b18" +dependencies = [ + "event-listener", + "event-listener-strategy", + "pin-project-lite", +] + [[package]] name = "async-stream" version = "0.3.6" @@ -1337,6 +1398,19 @@ dependencies = [ "rustc_version 0.4.1", ] +[[package]] +name = "asynchronous-codec" +version = "0.7.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a860072022177f903e59730004fb5dc13db9275b79bb2aef7ba8ce831956c233" +dependencies = [ + "bytes", + "futures-sink", + "futures-util", + "memchr", + "pin-project-lite", +] + [[package]] name = "atomic-waker" version = "1.1.2" @@ -1420,6 +1494,12 @@ dependencies = [ "windows-targets 0.52.6", ] +[[package]] +name = "base-x" +version = "0.2.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4cbbc9d0964165b47557570cce6c952866c2678457aca742aafc9fb771d30270" + [[package]] name = "base16ct" version = "0.2.0" @@ -1528,6 +1608,15 @@ dependencies = [ "wyz", ] +[[package]] +name = "blake2" +version = "0.10.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "46502ad458c9a52b69d4d4d32775c788b7a1b85e8bc9d482d92250fc0e3f8efe" +dependencies = [ + "digest 0.10.7", +] + [[package]] name = "block-buffer" version = "0.10.4" @@ -1563,7 +1652,7 @@ dependencies = [ "hex", "http 1.3.1", "http-body-util", - "hyper", + "hyper 1.6.0", "hyper-named-pipe", "hyper-util", "hyperlocal", @@ -1620,6 +1709,15 @@ dependencies = [ "alloc-stdlib", ] +[[package]] +name = "bs58" +version = "0.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bf88ba1141d185c399bee5288d850d63b8369520c1eafc32a0430b5b6c287bf4" +dependencies = [ + "tinyvec", +] + [[package]] name = "bumpalo" version = "3.17.0" @@ -1678,6 +1776,15 @@ dependencies = [ "serde", ] +[[package]] +name = "cbor4ii" +version = "0.3.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "472931dd4dfcc785075b09be910147f9c6258883fc4591d0dac6116392b2daa6" +dependencies = [ + "serde", +] + [[package]] name = "cc" version = "1.2.20" @@ -1721,6 +1828,19 @@ dependencies = [ "cpufeatures", ] +[[package]] +name = "chacha20poly1305" +version = "0.10.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "10cd79432192d1c0f4e1a0fef9527696cc039165d729fb41b3f4f4f354c2dc35" +dependencies = [ + "aead", + "chacha20", + "cipher", + "poly1305", + "zeroize", +] + [[package]] name = "chrono" version = "0.4.41" @@ -1941,6 +2061,15 @@ version = "0.8.7" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "773648b94d0e5d620f64f280777445740e61fe701025087ec8b57f45c791888b" +[[package]] +name = "core2" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b49ba7ef1ad6107f8824dbe97de947cbaac53c44e7f9756a1fba0d37c1eec505" +dependencies = [ + "memchr", +] + [[package]] name = "cpufeatures" version = "0.2.17" @@ -2075,6 +2204,15 @@ dependencies = [ "zeroize", ] +[[package]] +name = "ctr" +version = "0.9.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0369ee1ad671834580515889b80f2ea915f23b8be8d0daa4bbaf2ac5c7590835" +dependencies = [ + "cipher", +] + [[package]] name = "ctrlc" version = "3.4.6" @@ -2169,6 +2307,26 @@ version = "2.9.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2a2330da5de22e8a3cb63252ce2abb30116bf5265e89c0e01bc17015ce30a476" +[[package]] +name = "data-encoding-macro" +version = "0.1.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "47ce6c96ea0102f01122a185683611bd5ac8d99e62bc59dd12e6bda344ee673d" +dependencies = [ + "data-encoding", + "data-encoding-macro-internal", +] + +[[package]] +name = "data-encoding-macro-internal" +version = "0.1.16" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8d162beedaa69905488a8da94f5ac3edb4dd4788b732fadb7bd120b2625c1976" +dependencies = [ + "data-encoding", + "syn 2.0.101", +] + [[package]] name = "der" version = "0.7.10" @@ -2634,6 +2792,27 @@ dependencies = [ "windows-sys 0.59.0", ] +[[package]] +name = "event-listener" +version = "5.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3492acde4c3fc54c845eaab3eed8bd00c7a7d881f78bfc801e43a93dec1331ae" +dependencies = [ + "concurrent-queue", + "parking", + "pin-project-lite", +] + +[[package]] +name = "event-listener-strategy" +version = "0.5.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8be9f3dfaaffdae2972880079a491a1a8bb7cbed0b8dd7a347f668b4150a3b93" +dependencies = [ + "event-listener", + "pin-project-lite", +] + [[package]] name = "eyre" version = "0.6.12" @@ -2735,7 +2914,7 @@ dependencies = [ "futures-core", "futures-sink", "nanorand", - "spin", + "spin 0.9.8", ] [[package]] @@ -2801,6 +2980,16 @@ dependencies = [ "futures-util", ] +[[package]] +name = "futures-bounded" +version = "0.2.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "91f328e7fb845fc832912fb6a34f40cf6d1888c92f974d1893a54e97b5ff542e" +dependencies = [ + "futures-timer", + "futures-util", +] + [[package]] name = "futures-buffered" version = "0.2.11" @@ -2811,7 +3000,7 @@ dependencies = [ "diatomic-waker", "futures-core", "pin-project-lite", - "spin", + "spin 0.9.8", ] [[package]] @@ -2839,6 +3028,7 @@ dependencies = [ "futures-core", "futures-task", "futures-util", + "num_cpus", ] [[package]] @@ -2886,6 +3076,17 @@ dependencies = [ "syn 2.0.101", ] +[[package]] +name = "futures-rustls" +version = "0.26.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a8f2f12607f92c69b12ed746fabf9ca4f5c482cba46679c1a75b874ed7c26adb" +dependencies = [ + "futures-io", + "rustls", + "rustls-pki-types", +] + [[package]] name = "futures-sink" version = "0.3.31" @@ -2898,6 +3099,12 @@ version = "0.3.31" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f90f7dce0722e95104fcb095585910c0977252f286e354b5e3bd38902cd99988" +[[package]] +name = "futures-timer" +version = "3.0.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f288b0a4f20f9a56b5d1da57e2227c661b7b16168e2f72365f57b63326e29b24" + [[package]] name = "futures-util" version = "0.3.31" @@ -2973,6 +3180,16 @@ dependencies = [ "wasm-bindgen", ] +[[package]] +name = "ghash" +version = "0.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f0d8a4362ccb29cb0b265253fb0a2728f592895ee6854fd9bc13f2ffda266ff1" +dependencies = [ + "opaque-debug", + "polyval", +] + [[package]] name = "gimli" version = "0.31.1" @@ -3072,7 +3289,7 @@ dependencies = [ "regex", "reqwest", "reqwest-middleware", - "ring", + "ring 0.17.14", "serde", "serde_json", "sha2", @@ -3177,6 +3394,12 @@ version = "0.3.9" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d231dfb89cfffdbc30e7fc41579ed6066ad03abda9e567ccafae602b97ec5024" +[[package]] +name = "hermit-abi" +version = "0.5.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fc0fef456e4baa96da950455cd02c081ca953b141298e41db3fc7e36b1da849c" + [[package]] name = "hex" version = "0.4.3" @@ -3195,6 +3418,31 @@ dependencies = [ "arrayvec", ] +[[package]] +name = "hickory-proto" +version = "0.24.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "92652067c9ce6f66ce53cc38d1169daa36e6e7eb7dd3b63b5103bd9d97117248" +dependencies = [ + "async-trait", + "cfg-if", + "data-encoding", + "enum-as-inner", + "futures-channel", + "futures-io", + "futures-util", + "idna 1.0.3", + "ipnet", + "once_cell", + "rand 0.8.5", + "socket2", + "thiserror 1.0.69", + "tinyvec", + "tokio", + "tracing", + "url", +] + [[package]] name = "hickory-proto" version = "0.25.2" @@ -3212,7 +3460,7 @@ dependencies = [ "ipnet", "once_cell", "rand 0.9.1", - "ring", + "ring 0.17.14", "thiserror 2.0.12", "tinyvec", "tokio", @@ -3220,6 +3468,27 @@ dependencies = [ "url", ] +[[package]] +name = "hickory-resolver" +version = "0.24.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cbb117a1ca520e111743ab2f6688eddee69db4e0ea242545a604dce8a66fd22e" +dependencies = [ + "cfg-if", + "futures-util", + "hickory-proto 0.24.4", + "ipconfig", + "lru-cache", + "once_cell", + "parking_lot 0.12.3", + "rand 0.8.5", + "resolv-conf", + "smallvec", + "thiserror 1.0.69", + "tokio", + "tracing", +] + [[package]] name = "hickory-resolver" version = "0.25.2" @@ -3228,7 +3497,7 @@ checksum = "dc62a9a99b0bfb44d2ab95a7208ac952d31060efc16241c87eaf36406fecf87a" dependencies = [ "cfg-if", "futures-util", - "hickory-proto", + "hickory-proto 0.25.2", "ipconfig", "moka", "once_cell", @@ -3241,6 +3510,15 @@ dependencies = [ "tracing", ] +[[package]] +name = "hkdf" +version = "0.12.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7b5f8eb2ad728638ea2c7d47a21db23b7b58a72ed6a38256b8a1849f15fbbdf7" +dependencies = [ + "hmac", +] + [[package]] name = "hmac" version = "0.12.1" @@ -3315,6 +3593,17 @@ dependencies = [ "itoa", ] +[[package]] +name = "http-body" +version = "0.4.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7ceab25649e9960c0311ea418d17bee82c0dcec1bd053b5f9a66e265a693bed2" +dependencies = [ + "bytes", + "http 0.2.12", + "pin-project-lite", +] + [[package]] name = "http-body" version = "1.0.1" @@ -3334,7 +3623,7 @@ dependencies = [ "bytes", "futures-core", "http 1.3.1", - "http-body", + "http-body 1.0.1", "pin-project-lite", ] @@ -3350,6 +3639,30 @@ version = "1.0.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "df3b46402a9d5adb4c86a0cf463f42e19994e3ee891101b1841f30a545cb49a9" +[[package]] +name = "hyper" +version = "0.14.32" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "41dfc780fdec9373c01bae43289ea34c972e40ee3c9f6b3c8801a35f35586ce7" +dependencies = [ + "bytes", + "futures-channel", + "futures-core", + "futures-util", + "h2 0.3.26", + "http 0.2.12", + "http-body 0.4.6", + "httparse", + "httpdate", + "itoa", + "pin-project-lite", + "socket2", + "tokio", + "tower-service", + "tracing", + "want", +] + [[package]] name = "hyper" version = "1.6.0" @@ -3361,7 +3674,7 @@ dependencies = [ "futures-util", "h2 0.4.9", "http 1.3.1", - "http-body", + "http-body 1.0.1", "httparse", "httpdate", "itoa", @@ -3378,7 +3691,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "73b7d8abf35697b81a825e386fc151e0d503e8cb5fcb93cc8669c376dfd6f278" dependencies = [ "hex", - "hyper", + "hyper 1.6.0", "hyper-util", "pin-project-lite", "tokio", @@ -3394,7 +3707,7 @@ checksum = "2d191583f3da1305256f22463b9bb0471acad48a4e534a5218b9963e9c1f59b2" dependencies = [ "futures-util", "http 1.3.1", - "hyper", + "hyper 1.6.0", "hyper-util", "rustls", "rustls-pki-types", @@ -3412,7 +3725,7 @@ checksum = "70206fc6890eaca9fde8a0bf71caa2ddfc9fe045ac9e5c70df101a7dbde866e0" dependencies = [ "bytes", "http-body-util", - "hyper", + "hyper 1.6.0", "hyper-util", "native-tls", "tokio", @@ -3430,8 +3743,8 @@ dependencies = [ "futures-channel", "futures-util", "http 1.3.1", - "http-body", - "hyper", + "http-body 1.0.1", + "hyper 1.6.0", "libc", "pin-project-lite", "socket2", @@ -3448,7 +3761,7 @@ checksum = "986c5ce3b994526b3cd75578e62554abd09f0899d6206de48b3e96ab34ccc8c7" dependencies = [ "hex", "http-body-util", - "hyper", + "hyper 1.6.0", "hyper-util", "pin-project-lite", "tokio", @@ -3634,12 +3947,64 @@ dependencies = [ "icu_properties", ] +[[package]] +name = "if-addrs" +version = "0.10.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cabb0019d51a643781ff15c9c8a3e5dedc365c47211270f4e8f82812fedd8f0a" +dependencies = [ + "libc", + "windows-sys 0.48.0", +] + +[[package]] +name = "if-watch" +version = "3.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cdf9d64cfcf380606e64f9a0bcf493616b65331199f984151a6fa11a7b3cde38" +dependencies = [ + "async-io", + "core-foundation", + "fnv", + "futures", + "if-addrs", + "ipnet", + "log", + "netlink-packet-core", + "netlink-packet-route 0.17.1", + "netlink-proto", + "netlink-sys", + "rtnetlink 0.13.1", + "system-configuration", + "tokio", + "windows 0.52.0", +] + [[package]] name = "if_chain" version = "1.0.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "cb56e1aa765b4b4f3aadfab769793b7087bb03a4ea4920644a6d238e2df5b9ed" +[[package]] +name = "igd-next" +version = "0.14.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "064d90fec10d541084e7b39ead8875a5a80d9114a2b18791565253bae25f49e4" +dependencies = [ + "async-trait", + "attohttpc", + "bytes", + "futures", + "http 0.2.12", + "hyper 0.14.32", + "log", + "rand 0.8.5", + "tokio", + "url", + "xmltree", +] + [[package]] name = "igd-next" version = "0.15.1" @@ -3652,7 +4017,7 @@ dependencies = [ "futures", "http 1.3.1", "http-body-util", - "hyper", + "hyper 1.6.0", "hyper-util", "log", "rand 0.8.5", @@ -3801,9 +4166,9 @@ dependencies = [ "derive_more 1.0.0", "ed25519-dalek", "futures-util", - "hickory-resolver", + "hickory-resolver 0.25.2", "http 1.3.1", - "igd-next", + "igd-next 0.15.1", "instant", "iroh-base", "iroh-metrics", @@ -3818,9 +4183,9 @@ dependencies = [ "pkarr", "portmapper", "rand 0.8.5", - "rcgen", + "rcgen 0.13.2", "reqwest", - "ring", + "ring 0.17.14", "rustls", "rustls-webpki 0.102.8", "serde", @@ -3900,7 +4265,7 @@ dependencies = [ "bytes", "getrandom 0.2.16", "rand 0.8.5", - "ring", + "ring 0.17.14", "rustc-hash 2.1.1", "rustls", "rustls-pki-types", @@ -3936,10 +4301,10 @@ dependencies = [ "cfg_aliases", "data-encoding", "derive_more 1.0.0", - "hickory-resolver", + "hickory-resolver 0.25.2", "http 1.3.1", "http-body-util", - "hyper", + "hyper 1.6.0", "hyper-util", "iroh-base", "iroh-metrics", @@ -4061,7 +4426,7 @@ dependencies = [ "base64 0.22.1", "js-sys", "pem", - "ring", + "ring 0.17.14", "serde", "serde_json", "simple_asn1", @@ -4113,32 +4478,464 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "bbd2bcb4c963f2ddae06a2efc7e9f3591312473c50c6685e1f298068316e66fe" [[package]] -name = "lazycell" -version = "1.3.0" +name = "lazycell" +version = "1.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "830d08ce1d1d941e6b30645f1a0eb5643013d835ce3779a5fc208261dbe10f55" + +[[package]] +name = "libc" +version = "0.2.172" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d750af042f7ef4f724306de029d18836c26c1765a54a6a3f094cbd23a7267ffa" + +[[package]] +name = "libloading" +version = "0.8.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fc2f4eb4bc735547cfed7c0a4922cbd04a4655978c09b54f1f7b228750664c34" +dependencies = [ + "cfg-if", + "windows-targets 0.52.6", +] + +[[package]] +name = "libm" +version = "0.2.13" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c9627da5196e5d8ed0b0495e61e518847578da83483c37288316d9b2e03a7f72" + +[[package]] +name = "libp2p" +version = "0.54.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bbbe80f9c7e00526cd6b838075b9c171919404a4732cb2fa8ece0a093223bfc4" +dependencies = [ + "bytes", + "either", + "futures", + "futures-timer", + "getrandom 0.2.16", + "libp2p-allow-block-list", + "libp2p-autonat", + "libp2p-connection-limits", + "libp2p-core", + "libp2p-dns", + "libp2p-identify", + "libp2p-identity", + "libp2p-kad", + "libp2p-mdns", + "libp2p-metrics", + "libp2p-noise", + "libp2p-ping", + "libp2p-quic", + "libp2p-request-response", + "libp2p-swarm", + "libp2p-tcp", + "libp2p-upnp", + "libp2p-yamux", + "multiaddr", + "pin-project", + "rw-stream-sink", + "thiserror 1.0.69", +] + +[[package]] +name = "libp2p-allow-block-list" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d1027ccf8d70320ed77e984f273bc8ce952f623762cb9bf2d126df73caef8041" +dependencies = [ + "libp2p-core", + "libp2p-identity", + "libp2p-swarm", + "void", +] + +[[package]] +name = "libp2p-autonat" +version = "0.13.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a083675f189803d0682a2726131628e808144911dad076858bfbe30b13065499" +dependencies = [ + "async-trait", + "asynchronous-codec", + "bytes", + "either", + "futures", + "futures-bounded", + "futures-timer", + "libp2p-core", + "libp2p-identity", + "libp2p-request-response", + "libp2p-swarm", + "quick-protobuf", + "quick-protobuf-codec", + "rand 0.8.5", + "rand_core 0.6.4", + "thiserror 1.0.69", + "tracing", + "void", + "web-time", +] + +[[package]] +name = "libp2p-connection-limits" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8d003540ee8baef0d254f7b6bfd79bac3ddf774662ca0abf69186d517ef82ad8" +dependencies = [ + "libp2p-core", + "libp2p-identity", + "libp2p-swarm", + "void", +] + +[[package]] +name = "libp2p-core" +version = "0.42.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a61f26c83ed111104cd820fe9bc3aaabbac5f1652a1d213ed6e900b7918a1298" +dependencies = [ + "either", + "fnv", + "futures", + "futures-timer", + "libp2p-identity", + "multiaddr", + "multihash", + "multistream-select", + "once_cell", + "parking_lot 0.12.3", + "pin-project", + "quick-protobuf", + "rand 0.8.5", + "rw-stream-sink", + "smallvec", + "thiserror 1.0.69", + "tracing", + "unsigned-varint 0.8.0", + "void", + "web-time", +] + +[[package]] +name = "libp2p-dns" +version = "0.42.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "97f37f30d5c7275db282ecd86e54f29dd2176bd3ac656f06abf43bedb21eb8bd" +dependencies = [ + "async-trait", + "futures", + "hickory-resolver 0.24.4", + "libp2p-core", + "libp2p-identity", + "parking_lot 0.12.3", + "smallvec", + "tracing", +] + +[[package]] +name = "libp2p-identify" +version = "0.45.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1711b004a273be4f30202778856368683bd9a83c4c7dcc8f848847606831a4e3" +dependencies = [ + "asynchronous-codec", + "either", + "futures", + "futures-bounded", + "futures-timer", + "libp2p-core", + "libp2p-identity", + "libp2p-swarm", + "lru 0.12.5", + "quick-protobuf", + "quick-protobuf-codec", + "smallvec", + "thiserror 1.0.69", + "tracing", + "void", +] + +[[package]] +name = "libp2p-identity" +version = "0.2.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3104e13b51e4711ff5738caa1fb54467c8604c2e94d607e27745bcf709068774" +dependencies = [ + "bs58", + "ed25519-dalek", + "hkdf", + "multihash", + "quick-protobuf", + "rand 0.8.5", + "sha2", + "thiserror 2.0.12", + "tracing", + "zeroize", +] + +[[package]] +name = "libp2p-kad" +version = "0.46.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ced237d0bd84bbebb7c2cad4c073160dacb4fe40534963c32ed6d4c6bb7702a3" +dependencies = [ + "arrayvec", + "asynchronous-codec", + "bytes", + "either", + "fnv", + "futures", + "futures-bounded", + "futures-timer", + "libp2p-core", + "libp2p-identity", + "libp2p-swarm", + "quick-protobuf", + "quick-protobuf-codec", + "rand 0.8.5", + "sha2", + "smallvec", + "thiserror 1.0.69", + "tracing", + "uint", + "void", + "web-time", +] + +[[package]] +name = "libp2p-mdns" +version = "0.46.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "14b8546b6644032565eb29046b42744aee1e9f261ed99671b2c93fb140dba417" +dependencies = [ + "data-encoding", + "futures", + "hickory-proto 0.24.4", + "if-watch", + "libp2p-core", + "libp2p-identity", + "libp2p-swarm", + "rand 0.8.5", + "smallvec", + "socket2", + "tokio", + "tracing", + "void", +] + +[[package]] +name = "libp2p-metrics" +version = "0.15.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "77ebafa94a717c8442d8db8d3ae5d1c6a15e30f2d347e0cd31d057ca72e42566" +dependencies = [ + "futures", + "libp2p-core", + "libp2p-identify", + "libp2p-identity", + "libp2p-kad", + "libp2p-ping", + "libp2p-swarm", + "pin-project", + "prometheus-client", + "web-time", +] + +[[package]] +name = "libp2p-noise" +version = "0.45.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "36b137cb1ae86ee39f8e5d6245a296518912014eaa87427d24e6ff58cfc1b28c" +dependencies = [ + "asynchronous-codec", + "bytes", + "curve25519-dalek", + "futures", + "libp2p-core", + "libp2p-identity", + "multiaddr", + "multihash", + "once_cell", + "quick-protobuf", + "rand 0.8.5", + "sha2", + "snow", + "static_assertions", + "thiserror 1.0.69", + "tracing", + "x25519-dalek", + "zeroize", +] + +[[package]] +name = "libp2p-ping" +version = "0.45.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "005a34420359223b974ee344457095f027e51346e992d1e0dcd35173f4cdd422" +dependencies = [ + "either", + "futures", + "futures-timer", + "libp2p-core", + "libp2p-identity", + "libp2p-swarm", + "rand 0.8.5", + "tracing", + "void", + "web-time", +] + +[[package]] +name = "libp2p-quic" +version = "0.11.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "46352ac5cd040c70e88e7ff8257a2ae2f891a4076abad2c439584a31c15fd24e" +dependencies = [ + "bytes", + "futures", + "futures-timer", + "if-watch", + "libp2p-core", + "libp2p-identity", + "libp2p-tls", + "parking_lot 0.12.3", + "quinn", + "rand 0.8.5", + "ring 0.17.14", + "rustls", + "socket2", + "thiserror 1.0.69", + "tokio", + "tracing", +] + +[[package]] +name = "libp2p-request-response" +version = "0.27.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1356c9e376a94a75ae830c42cdaea3d4fe1290ba409a22c809033d1b7dcab0a6" +dependencies = [ + "async-trait", + "cbor4ii", + "futures", + "futures-bounded", + "futures-timer", + "libp2p-core", + "libp2p-identity", + "libp2p-swarm", + "rand 0.8.5", + "serde", + "smallvec", + "tracing", + "void", + "web-time", +] + +[[package]] +name = "libp2p-swarm" +version = "0.45.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d7dd6741793d2c1fb2088f67f82cf07261f25272ebe3c0b0c311e0c6b50e851a" +dependencies = [ + "either", + "fnv", + "futures", + "futures-timer", + "libp2p-core", + "libp2p-identity", + "libp2p-swarm-derive", + "lru 0.12.5", + "multistream-select", + "once_cell", + "rand 0.8.5", + "smallvec", + "tokio", + "tracing", + "void", + "web-time", +] + +[[package]] +name = "libp2p-swarm-derive" +version = "0.35.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "206e0aa0ebe004d778d79fb0966aa0de996c19894e2c0605ba2f8524dd4443d8" +dependencies = [ + "heck", + "proc-macro2", + "quote", + "syn 2.0.101", +] + +[[package]] +name = "libp2p-tcp" +version = "0.42.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "830d08ce1d1d941e6b30645f1a0eb5643013d835ce3779a5fc208261dbe10f55" +checksum = "ad964f312c59dcfcac840acd8c555de8403e295d39edf96f5240048b5fcaa314" +dependencies = [ + "futures", + "futures-timer", + "if-watch", + "libc", + "libp2p-core", + "libp2p-identity", + "socket2", + "tokio", + "tracing", +] [[package]] -name = "libc" -version = "0.2.172" +name = "libp2p-tls" +version = "0.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d750af042f7ef4f724306de029d18836c26c1765a54a6a3f094cbd23a7267ffa" +checksum = "47b23dddc2b9c355f73c1e36eb0c3ae86f7dc964a3715f0731cfad352db4d847" +dependencies = [ + "futures", + "futures-rustls", + "libp2p-core", + "libp2p-identity", + "rcgen 0.11.3", + "ring 0.17.14", + "rustls", + "rustls-webpki 0.101.7", + "thiserror 1.0.69", + "x509-parser", + "yasna", +] [[package]] -name = "libloading" -version = "0.8.6" +name = "libp2p-upnp" +version = "0.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fc2f4eb4bc735547cfed7c0a4922cbd04a4655978c09b54f1f7b228750664c34" +checksum = "01bf2d1b772bd3abca049214a3304615e6a36fa6ffc742bdd1ba774486200b8f" dependencies = [ - "cfg-if", - "windows-targets 0.52.6", + "futures", + "futures-timer", + "igd-next 0.14.3", + "libp2p-core", + "libp2p-swarm", + "tokio", + "tracing", + "void", ] [[package]] -name = "libm" -version = "0.2.13" +name = "libp2p-yamux" +version = "0.46.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c9627da5196e5d8ed0b0495e61e518847578da83483c37288316d9b2e03a7f72" +checksum = "788b61c80789dba9760d8c669a5bedb642c8267555c803fabd8396e4ca5c5882" +dependencies = [ + "either", + "futures", + "libp2p-core", + "thiserror 1.0.69", + "tracing", + "yamux 0.12.1", + "yamux 0.13.5", +] [[package]] name = "libredox" @@ -4159,12 +4956,24 @@ dependencies = [ "zlib-rs", ] +[[package]] +name = "linked-hash-map" +version = "0.5.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0717cef1bc8b636c6e1c1bbdefc09e6322da8a9321966e8928ef80d20f7f770f" + [[package]] name = "linux-raw-sys" version = "0.4.15" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d26c52dbd32dccf2d10cac7725f8eae5296885fb5703b261f7d0a0739ec807ab" +[[package]] +name = "linux-raw-sys" +version = "0.9.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cd945864f07fe9f5371a27ad7b52a172b4b499999f1d97574c9fa68373937e12" + [[package]] name = "litemap" version = "0.7.5" @@ -4251,6 +5060,15 @@ dependencies = [ "hashbrown 0.15.2", ] +[[package]] +name = "lru-cache" +version = "0.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "31e24f1ad8321ca0e8a1e0ac13f23cb668e6f5466c2c57319f6a5cf1cc8e3b1c" +dependencies = [ + "linked-hash-map", +] + [[package]] name = "mach" version = "0.3.2" @@ -4375,9 +5193,9 @@ dependencies = [ "colored", "futures-util", "http 1.3.1", - "http-body", + "http-body 1.0.1", "http-body-util", - "hyper", + "hyper 1.6.0", "hyper-util", "log", "rand 0.9.1", @@ -4407,6 +5225,60 @@ dependencies = [ "uuid", ] +[[package]] +name = "multiaddr" +version = "0.18.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fe6351f60b488e04c1d21bc69e56b89cb3f5e8f5d22557d6e8031bdfd79b6961" +dependencies = [ + "arrayref", + "byteorder", + "data-encoding", + "libp2p-identity", + "multibase", + "multihash", + "percent-encoding", + "serde", + "static_assertions", + "unsigned-varint 0.8.0", + "url", +] + +[[package]] +name = "multibase" +version = "0.9.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9b3539ec3c1f04ac9748a260728e855f261b4977f5c3406612c884564f329404" +dependencies = [ + "base-x", + "data-encoding", + "data-encoding-macro", +] + +[[package]] +name = "multihash" +version = "0.19.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6b430e7953c29dd6a09afc29ff0bb69c6e306329ee6794700aee27b76a1aea8d" +dependencies = [ + "core2", + "unsigned-varint 0.8.0", +] + +[[package]] +name = "multistream-select" +version = "0.13.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ea0df8e5eec2298a62b326ee4f0d7fe1a6b90a09dfcf9df37b38f947a8c42f19" +dependencies = [ + "bytes", + "futures", + "log", + "pin-project", + "smallvec", + "unsigned-varint 0.7.2", +] + [[package]] name = "n0-future" version = "0.1.3" @@ -4651,6 +5523,12 @@ version = "0.6.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "43794a0ace135be66a25d3ae77d41b91615fb68ae937f904090203e81f755b65" +[[package]] +name = "nohash-hasher" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2bf50223579dc7cdcfb3bfcacf7069ff68243f8c363f62ffa99cf000a6b9c451" + [[package]] name = "nom" version = "7.1.3" @@ -4741,7 +5619,7 @@ version = "1.16.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "4161fcb6d602d4d2081af7c3a45852d875a03dd337a6bfdd6e06407b61342a43" dependencies = [ - "hermit-abi", + "hermit-abi 0.3.9", "libc", ] @@ -4934,6 +5812,17 @@ version = "0.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b15813163c1d831bf4a13c3610c05c0d03b39feb07f7e09fa234dac9b15aaf39" +[[package]] +name = "p2p" +version = "0.3.10" +dependencies = [ + "anyhow", + "libp2p", + "serde", + "tokio", + "void", +] + [[package]] name = "parity-scale-codec" version = "3.7.4" @@ -5216,6 +6105,21 @@ dependencies = [ "pnet_macros_support", ] +[[package]] +name = "polling" +version = "3.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b53a684391ad002dd6a596ceb6c74fd004fdce75f4be2e3f615068abbea5fd50" +dependencies = [ + "cfg-if", + "concurrent-queue", + "hermit-abi 0.5.2", + "pin-project-lite", + "rustix 1.0.7", + "tracing", + "windows-sys 0.59.0", +] + [[package]] name = "poly1305" version = "0.8.0" @@ -5227,6 +6131,18 @@ dependencies = [ "universal-hash", ] +[[package]] +name = "polyval" +version = "0.6.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9d1fe60d06143b2430aa532c94cfe9e29783047f06c0d7fd359a9a51b729fa25" +dependencies = [ + "cfg-if", + "cpufeatures", + "opaque-debug", + "universal-hash", +] + [[package]] name = "portable-atomic" version = "1.11.0" @@ -5253,7 +6169,7 @@ dependencies = [ "derive_more 1.0.0", "futures-lite 2.6.0", "futures-util", - "igd-next", + "igd-next 0.15.1", "iroh-metrics", "libc", "netwatch", @@ -5574,6 +6490,28 @@ version = "1.2.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a1d01941d82fa2ab50be1e79e6714289dd7cde78eba4c074bc5a4374f650dfe0" +[[package]] +name = "quick-protobuf" +version = "0.8.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9d6da84cc204722a989e01ba2f6e1e276e190f22263d0cb6ce8526fcdb0d2e1f" +dependencies = [ + "byteorder", +] + +[[package]] +name = "quick-protobuf-codec" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "15a0580ab32b169745d7a39db2ba969226ca16738931be152a3209b409de2474" +dependencies = [ + "asynchronous-codec", + "bytes", + "quick-protobuf", + "thiserror 1.0.69", + "unsigned-varint 0.8.0", +] + [[package]] name = "quinn" version = "0.11.7" @@ -5582,6 +6520,7 @@ checksum = "c3bd15a6f2967aef83887dcb9fec0014580467e33720d073560cf015a5683012" dependencies = [ "bytes", "cfg_aliases", + "futures-io", "pin-project-lite", "quinn-proto", "quinn-udp", @@ -5603,7 +6542,7 @@ dependencies = [ "bytes", "getrandom 0.3.2", "rand 0.9.1", - "ring", + "ring 0.17.14", "rustc-hash 2.1.1", "rustls", "rustls-pki-types", @@ -5765,6 +6704,18 @@ dependencies = [ "crossbeam-utils", ] +[[package]] +name = "rcgen" +version = "0.11.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "52c4f3084aa3bc7dfbba4eff4fab2a54db4324965d8872ab933565e6fbd83bc6" +dependencies = [ + "pem", + "ring 0.16.20", + "time", + "yasna", +] + [[package]] name = "rcgen" version = "0.13.2" @@ -5772,7 +6723,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "75e669e5202259b5314d1ea5397316ad400819437857b90861765f24c4cf80a2" dependencies = [ "pem", - "ring", + "ring 0.17.14", "rustls-pki-types", "time", "yasna", @@ -5911,9 +6862,9 @@ dependencies = [ "futures-util", "h2 0.4.9", "http 1.3.1", - "http-body", + "http-body 1.0.1", "http-body-util", - "hyper", + "hyper 1.6.0", "hyper-rustls", "hyper-tls", "hyper-util", @@ -5981,6 +6932,21 @@ dependencies = [ "subtle", ] +[[package]] +name = "ring" +version = "0.16.20" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3053cf52e236a3ed746dfc745aa9cacf1b791d846bdaf412f60a8d7d6e17c8fc" +dependencies = [ + "cc", + "libc", + "once_cell", + "spin 0.5.2", + "untrusted 0.7.1", + "web-sys", + "winapi", +] + [[package]] name = "ring" version = "0.17.14" @@ -5991,7 +6957,7 @@ dependencies = [ "cfg-if", "getrandom 0.2.16", "libc", - "untrusted", + "untrusted 0.9.0", "windows-sys 0.52.0", ] @@ -6168,7 +7134,20 @@ dependencies = [ "bitflags 2.9.0", "errno", "libc", - "linux-raw-sys", + "linux-raw-sys 0.4.15", + "windows-sys 0.59.0", +] + +[[package]] +name = "rustix" +version = "1.0.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c71e83d6afe7ff64890ec6b71d6a69bb8a610ab78ce364b3352876bb4c801266" +dependencies = [ + "bitflags 2.9.0", + "errno", + "libc", + "linux-raw-sys 0.9.4", "windows-sys 0.59.0", ] @@ -6181,7 +7160,7 @@ dependencies = [ "aws-lc-rs", "log", "once_cell", - "ring", + "ring 0.17.14", "rustls-pki-types", "rustls-webpki 0.103.1", "subtle", @@ -6206,15 +7185,25 @@ dependencies = [ "web-time", ] +[[package]] +name = "rustls-webpki" +version = "0.101.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8b6275d1ee7a1cd780b64aca7726599a1dbc893b1e64144529e55c3c2f745765" +dependencies = [ + "ring 0.17.14", + "untrusted 0.9.0", +] + [[package]] name = "rustls-webpki" version = "0.102.8" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "64ca1bc8749bd4cf37b5ce386cc146580777b4e8572c7b97baf22c83f444bee9" dependencies = [ - "ring", + "ring 0.17.14", "rustls-pki-types", - "untrusted", + "untrusted 0.9.0", ] [[package]] @@ -6224,9 +7213,9 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "fef8b8769aaccf73098557a87cd1816b4f9c7c16811c9c77142aa695c16f2c03" dependencies = [ "aws-lc-rs", - "ring", + "ring 0.17.14", "rustls-pki-types", - "untrusted", + "untrusted 0.9.0", ] [[package]] @@ -6247,6 +7236,17 @@ dependencies = [ "wait-timeout", ] +[[package]] +name = "rw-stream-sink" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d8c9026ff5d2f23da5e45bbc283f156383001bfb09c4e44256d02c1a685fe9a1" +dependencies = [ + "futures", + "pin-project", + "static_assertions", +] + [[package]] name = "ryu" version = "1.0.20" @@ -6705,6 +7705,23 @@ version = "1.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1b6b67fb9a61334225b5b790716f609cd58395f895b3fe8b328786812a40bc3b" +[[package]] +name = "snow" +version = "0.9.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "850948bee068e713b8ab860fe1adc4d109676ab4c3b621fd8147f06b261f2f85" +dependencies = [ + "aes-gcm", + "blake2", + "chacha20poly1305", + "curve25519-dalek", + "rand_core 0.6.4", + "ring 0.17.14", + "rustc_version 0.4.1", + "sha2", + "subtle", +] + [[package]] name = "socket2" version = "0.5.9" @@ -6715,6 +7732,12 @@ dependencies = [ "windows-sys 0.52.0", ] +[[package]] +name = "spin" +version = "0.5.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6e63cff320ae2c57904679ba7cb63280a3dc4613885beafb148ee7bf9aa9042d" + [[package]] name = "spin" version = "0.9.8" @@ -6845,7 +7868,7 @@ dependencies = [ "lazy_static", "md-5", "rand 0.8.5", - "ring", + "ring 0.17.14", "subtle", "thiserror 1.0.69", "tokio", @@ -7010,7 +8033,7 @@ dependencies = [ "cfg-if", "fastrand 2.3.0", "once_cell", - "rustix", + "rustix 0.38.44", "windows-sys 0.59.0", ] @@ -7565,6 +8588,24 @@ dependencies = [ "subtle", ] +[[package]] +name = "unsigned-varint" +version = "0.7.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6889a77d49f1f013504cec6bf97a2c730394adedaeb1deb5ea08949a50541105" + +[[package]] +name = "unsigned-varint" +version = "0.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "eb066959b24b5196ae73cb057f45598450d2c5f71460e98c49b738086eff9c06" + +[[package]] +name = "untrusted" +version = "0.7.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a156c684c91ea7d62626509bce3cb4e1d9ed5c4d978f7b4352658f96a4c26b4a" + [[package]] name = "untrusted" version = "0.9.0" @@ -7784,6 +8825,12 @@ version = "0.9.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0b928f33d975fc6ad9f86c8f283853ad26bdd5b10b7f1542aa2fa15e2289105a" +[[package]] +name = "void" +version = "1.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6a02e4885ed3bc0f2de90ea6dd45ebcbb66dacffe03547fadbb0eeae2770887d" + [[package]] name = "vte" version = "0.14.1" @@ -8005,7 +9052,7 @@ dependencies = [ "either", "home", "once_cell", - "rustix", + "rustix 0.38.44", ] [[package]] @@ -8670,6 +9717,18 @@ dependencies = [ "tap", ] +[[package]] +name = "x25519-dalek" +version = "2.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c7e468321c81fb07fa7f4c636c3972b9100f0346e5b6a9f2bd0603a52f7ed277" +dependencies = [ + "curve25519-dalek", + "rand_core 0.6.4", + "serde", + "zeroize", +] + [[package]] name = "x509-parser" version = "0.16.0" @@ -8702,6 +9761,37 @@ dependencies = [ "xml-rs", ] +[[package]] +name = "yamux" +version = "0.12.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9ed0164ae619f2dc144909a9f082187ebb5893693d8c0196e8085283ccd4b776" +dependencies = [ + "futures", + "log", + "nohash-hasher", + "parking_lot 0.12.3", + "pin-project", + "rand 0.8.5", + "static_assertions", +] + +[[package]] +name = "yamux" +version = "0.13.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3da1acad1c2dc53f0dde419115a38bd8221d8c3e47ae9aeceaf453266d29307e" +dependencies = [ + "futures", + "log", + "nohash-hasher", + "parking_lot 0.12.3", + "pin-project", + "rand 0.9.1", + "static_assertions", + "web-time", +] + [[package]] name = "yasna" version = "0.5.2" diff --git a/Cargo.toml b/Cargo.toml index 878eec1c..7bc5fc2b 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -5,6 +5,7 @@ members = [ "crates/validator", "crates/shared", "crates/orchestrator", + "crates/p2p", "crates/dev-utils", ] resolver = "2" @@ -48,7 +49,7 @@ edition = "2021" match_same_arms = "warn" unused_async = "warn" uninlined_format_args = "warn" +manual_let_else = "warn" [workspace.lints.rust] unreachable_pub = "warn" -manual_let_else = "warn" \ No newline at end of file diff --git a/crates/p2p/Cargo.toml b/crates/p2p/Cargo.toml new file mode 100644 index 00000000..2d5d94ff --- /dev/null +++ b/crates/p2p/Cargo.toml @@ -0,0 +1,15 @@ +[package] +name = "p2p" +version.workspace = true +edition.workspace = true + +[dependencies] +libp2p = { version = "0.54", features = ["request-response", "identify", "ping", "mdns", "noise", "tcp", "autonat", "kad", "tokio", "cbor", "macros", "yamux"] } +void = "1.0" + +anyhow = {workspace = true} +serde = {workspace = true} +tokio = {workspace = true, features = ["sync"]} + +[lints] +workspace = true diff --git a/crates/p2p/src/behaviour.rs b/crates/p2p/src/behaviour.rs new file mode 100644 index 00000000..6b66394e --- /dev/null +++ b/crates/p2p/src/behaviour.rs @@ -0,0 +1,165 @@ +use anyhow::Context as _; +use anyhow::Result; +use libp2p::autonat; +use libp2p::connection_limits; +use libp2p::connection_limits::ConnectionLimits; +use libp2p::identify; +use libp2p::identity; +use libp2p::kad; +use libp2p::kad::store::MemoryStore; +use libp2p::mdns; +use libp2p::ping; +use libp2p::request_response; +use libp2p::swarm::NetworkBehaviour; +use std::time::Duration; + +use crate::message::IncomingMessage; +use crate::message::{Request, Response}; +use crate::Protocols; +use crate::PRIME_STREAM_PROTOCOL; + +#[derive(NetworkBehaviour)] +#[behaviour(to_swarm = "BehaviourEvent")] +pub(crate) struct Behaviour { + // connection gating + connection_limits: connection_limits::Behaviour, + + // discovery + mdns: mdns::tokio::Behaviour, + kademlia: kad::Behaviour, + + // protocols + identify: identify::Behaviour, + ping: ping::Behaviour, + request_response: request_response::cbor::Behaviour, + + // nat traversal + autonat: autonat::Behaviour, +} + +#[derive(Debug)] +pub(crate) enum BehaviourEvent { + Autonat(autonat::Event), + Identify(identify::Event), + Kademlia(kad::Event), + Mdns(mdns::Event), + Ping(ping::Event), + RequestResponse(request_response::Event), +} + +impl From for BehaviourEvent { + fn from(_: void::Void) -> Self { + unreachable!("void::Void cannot be converted to BehaviourEvent") + } +} + +impl From for BehaviourEvent { + fn from(event: autonat::Event) -> Self { + BehaviourEvent::Autonat(event) + } +} + +impl From for BehaviourEvent { + fn from(event: kad::Event) -> Self { + BehaviourEvent::Kademlia(event) + } +} + +impl From for BehaviourEvent { + fn from(event: libp2p::mdns::Event) -> Self { + BehaviourEvent::Mdns(event) + } +} + +impl From for BehaviourEvent { + fn from(event: ping::Event) -> Self { + BehaviourEvent::Ping(event) + } +} + +impl From for BehaviourEvent { + fn from(event: identify::Event) -> Self { + BehaviourEvent::Identify(event) + } +} + +impl From> for BehaviourEvent { + fn from(event: request_response::Event) -> Self { + BehaviourEvent::RequestResponse(event) + } +} + +impl Behaviour { + pub(crate) fn new( + keypair: &identity::Keypair, + protocols: Protocols, + agent_version: String, + ) -> Result { + let peer_id = keypair.public().to_peer_id(); + + let protocols = protocols.into_iter().map(|protocol| { + ( + protocol.as_stream_protocol(), + request_response::ProtocolSupport::Full, // TODO: configure inbound/outbound based on node role and protocol + ) + }); + + let autonat = autonat::Behaviour::new(peer_id, autonat::Config::default()); + let connection_limits = connection_limits::Behaviour::new( + ConnectionLimits::default().with_max_established(Some(100)), + ); + + let mdns = mdns::tokio::Behaviour::new(mdns::Config::default(), peer_id) + .context("failed to create mDNS behaviour")?; + let kademlia = kad::Behaviour::new(peer_id, MemoryStore::new(peer_id)); + + let identify = identify::Behaviour::new( + identify::Config::new(PRIME_STREAM_PROTOCOL.to_string(), keypair.public()) + .with_agent_version(agent_version), + ); + let ping = ping::Behaviour::new(ping::Config::new().with_interval(Duration::from_secs(10))); + + Ok(Self { + autonat, + connection_limits, + kademlia, + mdns, + identify, + ping, + request_response: request_response::cbor::Behaviour::new( + protocols, + request_response::Config::default(), + ), + }) + } + + pub(crate) fn request_response( + &mut self, + ) -> &mut request_response::cbor::Behaviour { + &mut self.request_response + } +} + +impl BehaviourEvent { + pub(crate) async fn handle(self, message_tx: tokio::sync::mpsc::Sender) { + match self { + BehaviourEvent::Autonat(_event) => {} + BehaviourEvent::Identify(_event) => {} + BehaviourEvent::Kademlia(_event) => { // TODO: potentially on outbound queries + } + BehaviourEvent::Mdns(_event) => {} + BehaviourEvent::Ping(_event) => {} + BehaviourEvent::RequestResponse(event) => match event { + request_response::Event::Message { peer, message } => { + let _ = message_tx + .send(IncomingMessage { + peer: peer.clone(), + message, + }) + .await; + } + _ => {} + }, + } + } +} diff --git a/crates/p2p/src/lib.rs b/crates/p2p/src/lib.rs new file mode 100644 index 00000000..adde16af --- /dev/null +++ b/crates/p2p/src/lib.rs @@ -0,0 +1,299 @@ +use anyhow::Context; +use anyhow::Result; +use libp2p::futures::stream::FuturesUnordered; +use libp2p::multiaddr::Protocol; +use libp2p::noise; +use libp2p::swarm::SwarmEvent; +use libp2p::tcp; +use libp2p::yamux; +use libp2p::Multiaddr; +use libp2p::Swarm; +use libp2p::SwarmBuilder; +use libp2p::{identity, PeerId, Transport}; +use std::time::Duration; + +mod behaviour; +mod message; +mod protocol; + +use behaviour::Behaviour; +use message::{IncomingMessage, OutgoingMessage, OutgoingMessageInner}; +use protocol::Protocols; + +pub const PRIME_STREAM_PROTOCOL: libp2p::StreamProtocol = + libp2p::StreamProtocol::new("/prime/1.0.0"); +// TODO: force this to be passed by the user +pub const DEFAULT_AGENT_VERSION: &str = "prime-node/0.1.0"; + +pub struct Node { + peer_id: PeerId, + listen_addrs: Vec, + swarm: Swarm, + bootnodes: Vec, + + // channel for sending incoming messages to the consumer of this library + incoming_message_tx: tokio::sync::mpsc::Sender, + + // channel for receiving outgoing messages from the consumer of this library + outgoing_message_rx: tokio::sync::mpsc::Receiver, +} + +impl Node { + pub fn peer_id(&self) -> PeerId { + self.peer_id + } + + pub fn listen_addrs(&self) -> &[libp2p::Multiaddr] { + &self.listen_addrs + } + + /// Returns the multiaddresses that this node is listening on, with the peer ID included. + pub fn multiaddrs(&self) -> Vec { + self.listen_addrs + .iter() + .map(|addr| addr.clone().with(Protocol::P2p(self.peer_id))) + .collect() + } + + pub async fn run(self) -> Result<()> { + use libp2p::futures::StreamExt as _; + + let Node { + peer_id: _, + listen_addrs, + mut swarm, + bootnodes, + incoming_message_tx, + mut outgoing_message_rx, + } = self; + + for addr in listen_addrs { + swarm + .listen_on(addr) + .context("swarm failed to listen on multiaddr")?; + } + + let futures = FuturesUnordered::new(); + for bootnode in bootnodes { + futures.push(swarm.dial(bootnode)) + } + let results: Vec<_> = futures.into_iter().collect(); + for result in results { + match result { + Ok(_) => {} + Err(_e) => { + // TODO: log this error + } + } + } + + loop { + tokio::select! { + Some(message) = outgoing_message_rx.recv() => { + match message.message { + OutgoingMessageInner::Request(request) => { + swarm.behaviour_mut().request_response().send_request(&message.peer, request); + } + OutgoingMessageInner::Response((channel, response)) => { + if let Err(_e) = swarm.behaviour_mut().request_response().send_response(channel, response) { + // log error + } + } + } + } + event = swarm.select_next_some() => { + match event { + SwarmEvent::NewListenAddr { + listener_id: _, + address: _, + } => {} + SwarmEvent::ExternalAddrConfirmed { address: _ } => {} + SwarmEvent::Behaviour(event) => event.handle(incoming_message_tx.clone()).await, + _ => continue, + } + }, + } + } + } +} + +pub struct NodeBuilder { + port: Option, + listen_addrs: Vec, + keypair: Option, + agent_version: Option, + protocols: Protocols, + bootnodes: Vec, +} + +impl NodeBuilder { + pub fn new() -> Self { + Self { + port: None, + listen_addrs: Vec::new(), + keypair: None, + agent_version: None, + protocols: Protocols::new(), + bootnodes: Vec::new(), + } + } + + pub fn with_port(mut self, port: u16) -> Self { + self.port = Some(port); + self + } + + pub fn with_listen_addr(mut self, addr: libp2p::Multiaddr) -> Self { + self.listen_addrs.push(addr); + self + } + + pub fn with_keypair(mut self, keypair: identity::Keypair) -> Self { + self.keypair = Some(keypair); + self + } + + pub fn with_agent_version(mut self, agent_version: String) -> Self { + self.agent_version = Some(agent_version); + self + } + + pub fn with_validator_authentication(mut self) -> Self { + self.protocols = self.protocols.with_validator_authentication(); + self + } + + pub fn with_hardware_challenge(mut self) -> Self { + self.protocols = self.protocols.with_hardware_challenge(); + self + } + + pub fn with_invite(mut self) -> Self { + self.protocols = self.protocols.with_invite(); + self + } + + pub fn with_get_task_logs(mut self) -> Self { + self.protocols = self.protocols.with_get_task_logs(); + self + } + + pub fn with_restart(mut self) -> Self { + self.protocols = self.protocols.with_restart(); + self + } + + pub fn with_bootnode(mut self, bootnode: Multiaddr) -> Self { + self.bootnodes.push(bootnode); + self + } + + pub fn with_bootnodes(mut self, bootnodes: I) -> Self + where + I: IntoIterator, + T: Into, + { + for bootnode in bootnodes { + self.bootnodes.push(bootnode.into()); + } + self + } + + pub fn try_build( + self, + ) -> Result<( + Node, + tokio::sync::mpsc::Receiver, + tokio::sync::mpsc::Sender, + )> { + let Self { + port, + mut listen_addrs, + keypair, + agent_version, + protocols, + bootnodes, + } = self; + + let keypair = keypair.unwrap_or(identity::Keypair::generate_ed25519()); + let peer_id = keypair.public().to_peer_id(); + + let transport = create_transport(&keypair)?; + let behaviour = Behaviour::new( + &keypair, + protocols, + agent_version.unwrap_or(DEFAULT_AGENT_VERSION.to_string()), + ) + .context("failed to create behaviour")?; + + let swarm = SwarmBuilder::with_existing_identity(keypair) + .with_tokio() + .with_other_transport(|_| transport)? + .with_behaviour(|_| behaviour)? + .build(); + + if listen_addrs.is_empty() { + let port = port.unwrap_or(0); + let listen_addr = format!("/ip4/0.0.0.0/tcp/{port}") + .parse() + .expect("can parse valid multiaddr"); + listen_addrs.push(listen_addr); + } + + let (incoming_message_tx, incoming_message_rx) = tokio::sync::mpsc::channel(100); + let (outgoing_message_tx, outgoing_message_rx) = tokio::sync::mpsc::channel(100); + + Ok(( + Node { + peer_id, + swarm, + listen_addrs, + bootnodes, + incoming_message_tx, + outgoing_message_rx, + }, + incoming_message_rx, + outgoing_message_tx, + )) + } +} + +fn create_transport( + keypair: &identity::Keypair, +) -> Result> { + let transport = tcp::tokio::Transport::new(tcp::Config::default()) + .upgrade(libp2p::core::upgrade::Version::V1) + .authenticate(noise::Config::new(keypair)?) + .multiplex(yamux::Config::default()) + .timeout(Duration::from_secs(20)) + .boxed(); + + Ok(transport) +} + +#[cfg(test)] +mod test { + use super::NodeBuilder; + use crate::message; + + #[tokio::test] + async fn two_nodes_can_connect() -> anyhow::Result<()> { + let node1 = NodeBuilder::new().with_get_task_logs().try_build().unwrap(); + let (node1, mut incoming_message_rx1, outgoing_message_tx1) = node1; + + let node2 = NodeBuilder::new() + .with_get_task_logs() + .with_bootnodes(node1.multiaddrs()) + .try_build() + .unwrap(); + let (node2, mut incoming_message_rx2, outgoing_message_tx2) = node2; + + // Start both nodes in separate tasks + tokio::spawn(async move { node1.run().await }); + tokio::spawn(async move { node2.run().await }); + + let request = message::Request::GetTaskLogs; + + Ok(()) + } +} diff --git a/crates/p2p/src/message.rs b/crates/p2p/src/message.rs new file mode 100644 index 00000000..99b740db --- /dev/null +++ b/crates/p2p/src/message.rs @@ -0,0 +1,122 @@ +use libp2p::PeerId; +use serde::{Deserialize, Serialize}; +use std::time::SystemTime; + +#[derive(Debug)] +pub struct IncomingMessage { + pub peer: PeerId, + pub message: libp2p::request_response::Message, +} + +#[derive(Debug)] +pub struct OutgoingMessage { + pub peer: PeerId, + pub message: OutgoingMessageInner, +} + +#[derive(Debug)] +pub enum OutgoingMessageInner { + Request(Request), + Response( + ( + libp2p::request_response::ResponseChannel, + Response, + ), + ), +} + +#[derive(Debug, Clone, Serialize, Deserialize)] +pub enum Request { + ValidatorAuthentication(ValidatorAuthenticationRequest), + HardwareChallenge(HardwareChallengeRequest), + Invite(InviteRequest), + GetTaskLogs, + Restart, +} + +#[derive(Debug, Clone, Serialize, Deserialize)] +pub enum Response { + ValidatorAuthentication(ValidatorAuthenticationResponse), + HardwareChallenge(HardwareChallengeResponse), + Invite(InviteResponse), + GetTaskLogs(GetTaskLogsResponse), + Restart(RestartResponse), +} + +#[derive(Debug, Clone, Serialize, Deserialize)] +pub enum ValidatorAuthenticationRequest { + Initiation(ValidationAuthenticationInitiationRequest), + Solution(ValidationAuthenticationSolutionRequest), +} + +#[derive(Debug, Clone, Serialize, Deserialize)] +pub enum ValidatorAuthenticationResponse { + Initiation(ValidationAuthenticationInitiationResponse), + Solution(ValidationAuthenticationSolutionResponse), +} + +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct ValidationAuthenticationInitiationRequest { + pub message: String, +} + +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct ValidationAuthenticationInitiationResponse { + pub signed_message: String, + pub message: String, +} + +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct ValidationAuthenticationSolutionRequest { + pub signed_message: String, +} + +#[derive(Debug, Clone, Serialize, Deserialize)] +pub enum ValidationAuthenticationSolutionResponse { + Granted, + Rejected, +} + +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct HardwareChallengeRequest { + pub challenge: String, // TODO + pub timestamp: SystemTime, +} + +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct HardwareChallengeResponse { + pub response: String, // TODO + pub timestamp: SystemTime, +} + +#[derive(Debug, Clone, Serialize, Deserialize)] +pub enum InviteRequestUrl { + MasterUrl(String), + MasterIpPort(String, u16), +} + +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct InviteRequest { + pub invite: String, + pub pool_id: u32, + pub url: InviteRequestUrl, + pub timestamp: u64, + pub expiration: [u8; 32], + pub nonce: [u8; 32], +} + +#[derive(Debug, Clone, Serialize, Deserialize)] +pub enum InviteResponse { + Ok, + Error(String), +} + +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct GetTaskLogsResponse { + pub logs: Result, String>, +} + +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct RestartResponse { + pub result: Result<(), String>, +} diff --git a/crates/p2p/src/protocol.rs b/crates/p2p/src/protocol.rs new file mode 100644 index 00000000..5186ac44 --- /dev/null +++ b/crates/p2p/src/protocol.rs @@ -0,0 +1,73 @@ +use libp2p::StreamProtocol; +use std::{collections::HashSet, hash::Hash}; + +#[derive(Debug, Clone, PartialEq, Eq, Hash)] +pub(crate) enum Protocol { + // validator -> worker + ValidatorAuthentication, + // validator -> worker + HardwareChallenge, + // orchestrator -> worker + Invite, + // any -> worker + GetTaskLogs, + // any -> worker + Restart, +} + +impl Protocol { + pub(crate) fn as_stream_protocol(&self) -> StreamProtocol { + match self { + Protocol::ValidatorAuthentication => { + StreamProtocol::new("/prime/validator_authentication/1.0.0") + } + Protocol::HardwareChallenge => StreamProtocol::new("/prime/hardware_challenge/1.0.0"), + Protocol::Invite => StreamProtocol::new("/prime/invite/1.0.0"), + Protocol::GetTaskLogs => StreamProtocol::new("/prime/get_task_logs/1.0.0"), + Protocol::Restart => StreamProtocol::new("/prime/restart/1.0.0"), + } + } +} + +#[derive(Debug, Clone)] +pub(crate) struct Protocols(HashSet); + +impl Protocols { + pub(crate) fn new() -> Self { + Self(HashSet::new()) + } + + pub(crate) fn with_validator_authentication(mut self) -> Self { + self.0.insert(Protocol::ValidatorAuthentication); + self + } + + pub(crate) fn with_hardware_challenge(mut self) -> Self { + self.0.insert(Protocol::HardwareChallenge); + self + } + + pub(crate) fn with_invite(mut self) -> Self { + self.0.insert(Protocol::Invite); + self + } + + pub(crate) fn with_get_task_logs(mut self) -> Self { + self.0.insert(Protocol::GetTaskLogs); + self + } + + pub(crate) fn with_restart(mut self) -> Self { + self.0.insert(Protocol::Restart); + self + } +} + +impl IntoIterator for Protocols { + type Item = Protocol; + type IntoIter = std::collections::hash_set::IntoIter; + + fn into_iter(self) -> Self::IntoIter { + self.0.into_iter() + } +} From f5365d9762b69e103e55f60b7f304ef52418c1d3 Mon Sep 17 00:00:00 2001 From: elizabeth Date: Mon, 7 Jul 2025 17:49:08 -0400 Subject: [PATCH 02/38] request-response protocol working --- crates/p2p/src/behaviour.rs | 23 +++++++- crates/p2p/src/lib.rs | 102 +++++++++++++++++++++++++++++------- crates/p2p/src/message.rs | 25 ++++++--- 3 files changed, 121 insertions(+), 29 deletions(-) diff --git a/crates/p2p/src/behaviour.rs b/crates/p2p/src/behaviour.rs index 6b66394e..cd6606bb 100644 --- a/crates/p2p/src/behaviour.rs +++ b/crates/p2p/src/behaviour.rs @@ -151,6 +151,7 @@ impl BehaviourEvent { BehaviourEvent::Ping(_event) => {} BehaviourEvent::RequestResponse(event) => match event { request_response::Event::Message { peer, message } => { + println!("received message from peer {peer:?}: {message:?}"); let _ = message_tx .send(IncomingMessage { peer: peer.clone(), @@ -158,7 +159,27 @@ impl BehaviourEvent { }) .await; } - _ => {} + request_response::Event::ResponseSent { peer, request_id } => { + println!("response sent to peer {peer:?} for request ID {request_id:?}"); + } + request_response::Event::InboundFailure { + peer, + request_id, + error, + } => { + println!( + "inbound failure from peer {peer:?} for request ID {request_id:?}: {error}" + ); + } + request_response::Event::OutboundFailure { + peer, + request_id, + error, + } => { + println!( + "outbound failure to peer {peer:?} for request ID {request_id:?}: {error}" + ); + } }, } } diff --git a/crates/p2p/src/lib.rs b/crates/p2p/src/lib.rs index adde16af..777e8689 100644 --- a/crates/p2p/src/lib.rs +++ b/crates/p2p/src/lib.rs @@ -1,7 +1,6 @@ use anyhow::Context; use anyhow::Result; use libp2p::futures::stream::FuturesUnordered; -use libp2p::multiaddr::Protocol; use libp2p::noise; use libp2p::swarm::SwarmEvent; use libp2p::tcp; @@ -17,7 +16,7 @@ mod message; mod protocol; use behaviour::Behaviour; -use message::{IncomingMessage, OutgoingMessage, OutgoingMessageInner}; +use message::{IncomingMessage, OutgoingMessage}; use protocol::Protocols; pub const PRIME_STREAM_PROTOCOL: libp2p::StreamProtocol = @@ -51,7 +50,11 @@ impl Node { pub fn multiaddrs(&self) -> Vec { self.listen_addrs .iter() - .map(|addr| addr.clone().with(Protocol::P2p(self.peer_id))) + .map(|addr| { + addr.clone() + .with_p2p(self.peer_id) + .expect("can add peer ID to multiaddr") + }) .collect() } @@ -81,8 +84,9 @@ impl Node { for result in results { match result { Ok(_) => {} - Err(_e) => { + Err(e) => { // TODO: log this error + println!("failed to dial bootnode: {e:?}"); } } } @@ -90,13 +94,15 @@ impl Node { loop { tokio::select! { Some(message) = outgoing_message_rx.recv() => { - match message.message { - OutgoingMessageInner::Request(request) => { - swarm.behaviour_mut().request_response().send_request(&message.peer, request); + match message { + OutgoingMessage::Request((peer, request)) => { + swarm.behaviour_mut().request_response().send_request(&peer, request); } - OutgoingMessageInner::Response((channel, response)) => { - if let Err(_e) = swarm.behaviour_mut().request_response().send_response(channel, response) { + OutgoingMessage::Response((channel, response)) => { + println!("sending response on channel"); + if let Err(e) = swarm.behaviour_mut().request_response().send_response(channel, response) { // log error + println!("failed to send response: {e:?}"); } } } @@ -105,9 +111,22 @@ impl Node { match event { SwarmEvent::NewListenAddr { listener_id: _, - address: _, - } => {} - SwarmEvent::ExternalAddrConfirmed { address: _ } => {} + address, + } => { + println!("new listen address: {address}"); + } + SwarmEvent::ExternalAddrConfirmed { address } => { + println!("external address confirmed: {address}"); + } + SwarmEvent::ConnectionClosed { + peer_id, + cause, + endpoint: _, + connection_id: _, + num_established: _, + } => { + println!("connection closed with peer {peer_id}: {cause:?}"); + } SwarmEvent::Behaviour(event) => event.handle(incoming_message_tx.clone()).await, _ => continue, } @@ -230,6 +249,9 @@ impl NodeBuilder { .with_tokio() .with_other_transport(|_| transport)? .with_behaviour(|_| behaviour)? + .with_swarm_config(|cfg| { + cfg.with_idle_connection_timeout(Duration::from_secs(u64::MAX)) // don't disconnect from idle peers + }) .build(); if listen_addrs.is_empty() { @@ -277,23 +299,63 @@ mod test { use crate::message; #[tokio::test] - async fn two_nodes_can_connect() -> anyhow::Result<()> { - let node1 = NodeBuilder::new().with_get_task_logs().try_build().unwrap(); - let (node1, mut incoming_message_rx1, outgoing_message_tx1) = node1; + async fn two_nodes_can_connect_and_do_request_response() { + let (node1, mut incoming_message_rx1, outgoing_message_tx1) = + NodeBuilder::new().with_get_task_logs().try_build().unwrap(); + let node1_peer_id = node1.peer_id(); - let node2 = NodeBuilder::new() + let (node2, mut incoming_message_rx2, outgoing_message_tx2) = NodeBuilder::new() .with_get_task_logs() .with_bootnodes(node1.multiaddrs()) .try_build() .unwrap(); - let (node2, mut incoming_message_rx2, outgoing_message_tx2) = node2; + let node2_peer_id = node2.peer_id(); - // Start both nodes in separate tasks tokio::spawn(async move { node1.run().await }); tokio::spawn(async move { node2.run().await }); - let request = message::Request::GetTaskLogs; + // TODO: implement a way to get peer count + tokio::time::sleep(std::time::Duration::from_secs(2)).await; - Ok(()) + // send request from node1->node2 + let request = message::Request::GetTaskLogs; + outgoing_message_tx1 + .send(request.into_outgoing_message(node2_peer_id)) + .await + .unwrap(); + let message = incoming_message_rx2.recv().await.unwrap(); + assert_eq!(message.peer, node1_peer_id); + let libp2p::request_response::Message::Request { + request_id: _, + request: message::Request::GetTaskLogs, + channel, + } = message.message + else { + panic!("expected a GetTaskLogs request message"); + }; + + println!("received request from node1"); + + // send response from node2->node1 + let response = message::Response::GetTaskLogs(message::GetTaskLogsResponse { + logs: Ok(vec!["log1".to_string(), "log2".to_string()]), + }); + outgoing_message_tx2 + .send(response.into_outgoing_message(channel)) + .await + .unwrap(); + let message = incoming_message_rx1.recv().await.unwrap(); + assert_eq!(message.peer, node2_peer_id); + let libp2p::request_response::Message::Response { + request_id: _, + response: message::Response::GetTaskLogs(response), + } = message.message + else { + panic!("expected a GetTaskLogs response message"); + }; + assert_eq!( + response.logs, + Ok(vec!["log1".to_string(), "log2".to_string()]) + ); } } diff --git a/crates/p2p/src/message.rs b/crates/p2p/src/message.rs index 99b740db..97c07dff 100644 --- a/crates/p2p/src/message.rs +++ b/crates/p2p/src/message.rs @@ -9,14 +9,8 @@ pub struct IncomingMessage { } #[derive(Debug)] -pub struct OutgoingMessage { - pub peer: PeerId, - pub message: OutgoingMessageInner, -} - -#[derive(Debug)] -pub enum OutgoingMessageInner { - Request(Request), +pub enum OutgoingMessage { + Request((PeerId, Request)), Response( ( libp2p::request_response::ResponseChannel, @@ -34,6 +28,12 @@ pub enum Request { Restart, } +impl Request { + pub fn into_outgoing_message(self, peer: PeerId) -> OutgoingMessage { + OutgoingMessage::Request((peer, Request::from(self))) + } +} + #[derive(Debug, Clone, Serialize, Deserialize)] pub enum Response { ValidatorAuthentication(ValidatorAuthenticationResponse), @@ -43,6 +43,15 @@ pub enum Response { Restart(RestartResponse), } +impl Response { + pub fn into_outgoing_message( + self, + channel: libp2p::request_response::ResponseChannel, + ) -> OutgoingMessage { + OutgoingMessage::Response((channel, Response::from(self))) + } +} + #[derive(Debug, Clone, Serialize, Deserialize)] pub enum ValidatorAuthenticationRequest { Initiation(ValidationAuthenticationInitiationRequest), From 565ed95e32bd9e66208823ae629497be13063bb8 Mon Sep 17 00:00:00 2001 From: elizabeth Date: Mon, 7 Jul 2025 18:02:46 -0400 Subject: [PATCH 03/38] clippy --- crates/p2p/src/behaviour.rs | 8 ++------ crates/p2p/src/lib.rs | 6 ++++++ crates/p2p/src/message.rs | 20 ++++++++++---------- 3 files changed, 18 insertions(+), 16 deletions(-) diff --git a/crates/p2p/src/behaviour.rs b/crates/p2p/src/behaviour.rs index cd6606bb..54f264dd 100644 --- a/crates/p2p/src/behaviour.rs +++ b/crates/p2p/src/behaviour.rs @@ -152,12 +152,8 @@ impl BehaviourEvent { BehaviourEvent::RequestResponse(event) => match event { request_response::Event::Message { peer, message } => { println!("received message from peer {peer:?}: {message:?}"); - let _ = message_tx - .send(IncomingMessage { - peer: peer.clone(), - message, - }) - .await; + // if this errors, user dropped their incoming message channel + let _ = message_tx.send(IncomingMessage { peer, message }).await; } request_response::Event::ResponseSent { peer, request_id } => { println!("response sent to peer {peer:?} for request ID {request_id:?}"); diff --git a/crates/p2p/src/lib.rs b/crates/p2p/src/lib.rs index 777e8689..9f07d8d0 100644 --- a/crates/p2p/src/lib.rs +++ b/crates/p2p/src/lib.rs @@ -145,6 +145,12 @@ pub struct NodeBuilder { bootnodes: Vec, } +impl Default for NodeBuilder { + fn default() -> Self { + Self::new() + } +} + impl NodeBuilder { pub fn new() -> Self { Self { diff --git a/crates/p2p/src/message.rs b/crates/p2p/src/message.rs index 97c07dff..54d757c1 100644 --- a/crates/p2p/src/message.rs +++ b/crates/p2p/src/message.rs @@ -30,7 +30,7 @@ pub enum Request { impl Request { pub fn into_outgoing_message(self, peer: PeerId) -> OutgoingMessage { - OutgoingMessage::Request((peer, Request::from(self))) + OutgoingMessage::Request((peer, self)) } } @@ -48,40 +48,40 @@ impl Response { self, channel: libp2p::request_response::ResponseChannel, ) -> OutgoingMessage { - OutgoingMessage::Response((channel, Response::from(self))) + OutgoingMessage::Response((channel, self)) } } #[derive(Debug, Clone, Serialize, Deserialize)] pub enum ValidatorAuthenticationRequest { - Initiation(ValidationAuthenticationInitiationRequest), - Solution(ValidationAuthenticationSolutionRequest), + Initiation(ValidatorAuthenticationInitiationRequest), + Solution(ValidatorAuthenticationSolutionRequest), } #[derive(Debug, Clone, Serialize, Deserialize)] pub enum ValidatorAuthenticationResponse { - Initiation(ValidationAuthenticationInitiationResponse), - Solution(ValidationAuthenticationSolutionResponse), + Initiation(ValidatorAuthenticationInitiationResponse), + Solution(ValidatorAuthenticationSolutionResponse), } #[derive(Debug, Clone, Serialize, Deserialize)] -pub struct ValidationAuthenticationInitiationRequest { +pub struct ValidatorAuthenticationInitiationRequest { pub message: String, } #[derive(Debug, Clone, Serialize, Deserialize)] -pub struct ValidationAuthenticationInitiationResponse { +pub struct ValidatorAuthenticationInitiationResponse { pub signed_message: String, pub message: String, } #[derive(Debug, Clone, Serialize, Deserialize)] -pub struct ValidationAuthenticationSolutionRequest { +pub struct ValidatorAuthenticationSolutionRequest { pub signed_message: String, } #[derive(Debug, Clone, Serialize, Deserialize)] -pub enum ValidationAuthenticationSolutionResponse { +pub enum ValidatorAuthenticationSolutionResponse { Granted, Rejected, } From a5321211a3919879e4467f07a306a4ad661060c9 Mon Sep 17 00:00:00 2001 From: elizabeth Date: Mon, 7 Jul 2025 18:05:19 -0400 Subject: [PATCH 04/38] clippy --- crates/p2p/src/behaviour.rs | 1 + crates/p2p/src/message.rs | 1 + 2 files changed, 2 insertions(+) diff --git a/crates/p2p/src/behaviour.rs b/crates/p2p/src/behaviour.rs index 54f264dd..e2737d57 100644 --- a/crates/p2p/src/behaviour.rs +++ b/crates/p2p/src/behaviour.rs @@ -37,6 +37,7 @@ pub(crate) struct Behaviour { autonat: autonat::Behaviour, } +#[allow(clippy::large_enum_variant)] #[derive(Debug)] pub(crate) enum BehaviourEvent { Autonat(autonat::Event), diff --git a/crates/p2p/src/message.rs b/crates/p2p/src/message.rs index 54d757c1..9013a8ca 100644 --- a/crates/p2p/src/message.rs +++ b/crates/p2p/src/message.rs @@ -8,6 +8,7 @@ pub struct IncomingMessage { pub message: libp2p::request_response::Message, } +#[allow(clippy::large_enum_variant)] #[derive(Debug)] pub enum OutgoingMessage { Request((PeerId, Request)), From a548ce43802f19e12652158b40bee90abea135aa Mon Sep 17 00:00:00 2001 From: elizabeth Date: Tue, 8 Jul 2025 16:28:19 -0400 Subject: [PATCH 05/38] begin implementation of libp2p node in worker; working on msg handling --- Cargo.lock | 2 + Cargo.toml | 2 + crates/p2p/Cargo.toml | 1 + crates/p2p/src/lib.rs | 26 +++- crates/p2p/src/message.rs | 16 +- crates/worker/Cargo.toml | 2 + crates/worker/src/p2p/mod.rs | 274 +++++++++++++++++++++++++++++++++++ 7 files changed, 319 insertions(+), 4 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index a64a46e4..89b858dd 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -6265,6 +6265,7 @@ dependencies = [ "libp2p", "serde", "tokio", + "tokio-util", "void", ] @@ -10344,6 +10345,7 @@ dependencies = [ "log", "nalgebra", "nvml-wrapper", + "p2p", "rand 0.8.5", "rand 0.9.1", "rand_core 0.6.4", diff --git a/Cargo.toml b/Cargo.toml index 4279f156..d4ca7ab8 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -12,6 +12,8 @@ resolver = "2" [workspace.dependencies] shared = { path = "crates/shared" } +p2p = { path = "crates/p2p" } + actix-web = "4.9.0" clap = { version = "4.5.27", features = ["derive"] } serde = { version = "1.0.219", features = ["derive"] } diff --git a/crates/p2p/Cargo.toml b/crates/p2p/Cargo.toml index 2d5d94ff..46f9833a 100644 --- a/crates/p2p/Cargo.toml +++ b/crates/p2p/Cargo.toml @@ -10,6 +10,7 @@ void = "1.0" anyhow = {workspace = true} serde = {workspace = true} tokio = {workspace = true, features = ["sync"]} +tokio-util = { workspace = true, features = ["rt"] } [lints] workspace = true diff --git a/crates/p2p/src/lib.rs b/crates/p2p/src/lib.rs index 9f07d8d0..c02f5f09 100644 --- a/crates/p2p/src/lib.rs +++ b/crates/p2p/src/lib.rs @@ -8,7 +8,7 @@ use libp2p::yamux; use libp2p::Multiaddr; use libp2p::Swarm; use libp2p::SwarmBuilder; -use libp2p::{identity, PeerId, Transport}; +use libp2p::{identity, Transport}; use std::time::Duration; mod behaviour; @@ -16,9 +16,13 @@ mod message; mod protocol; use behaviour::Behaviour; -use message::{IncomingMessage, OutgoingMessage}; use protocol::Protocols; +pub use message::*; +pub type Libp2pIncomingMessage = libp2p::request_response::Message; +pub type ResponseChannel = libp2p::request_response::ResponseChannel; +pub type PeerId = libp2p::PeerId; + pub const PRIME_STREAM_PROTOCOL: libp2p::StreamProtocol = libp2p::StreamProtocol::new("/prime/1.0.0"); // TODO: force this to be passed by the user @@ -29,6 +33,7 @@ pub struct Node { listen_addrs: Vec, swarm: Swarm, bootnodes: Vec, + cancellation_token: tokio_util::sync::CancellationToken, // channel for sending incoming messages to the consumer of this library incoming_message_tx: tokio::sync::mpsc::Sender, @@ -66,6 +71,7 @@ impl Node { listen_addrs, mut swarm, bootnodes, + cancellation_token, incoming_message_tx, mut outgoing_message_rx, } = self; @@ -93,6 +99,10 @@ impl Node { loop { tokio::select! { + _ = cancellation_token.cancelled() => { + println!("cancellation token triggered, shutting down node"); + break Ok(()); + } Some(message) = outgoing_message_rx.recv() => { match message { OutgoingMessage::Request((peer, request)) => { @@ -143,6 +153,7 @@ pub struct NodeBuilder { agent_version: Option, protocols: Protocols, bootnodes: Vec, + cancellation_token: Option, } impl Default for NodeBuilder { @@ -160,6 +171,7 @@ impl NodeBuilder { agent_version: None, protocols: Protocols::new(), bootnodes: Vec::new(), + cancellation_token: None, } } @@ -224,6 +236,14 @@ impl NodeBuilder { self } + pub fn with_cancellation_token( + mut self, + cancellation_token: tokio_util::sync::CancellationToken, + ) -> Self { + self.cancellation_token = Some(cancellation_token); + self + } + pub fn try_build( self, ) -> Result<( @@ -238,6 +258,7 @@ impl NodeBuilder { agent_version, protocols, bootnodes, + cancellation_token, } = self; let keypair = keypair.unwrap_or(identity::Keypair::generate_ed25519()); @@ -279,6 +300,7 @@ impl NodeBuilder { bootnodes, incoming_message_tx, outgoing_message_rx, + cancellation_token: cancellation_token.unwrap_or_default(), }, incoming_message_rx, outgoing_message_tx, diff --git a/crates/p2p/src/message.rs b/crates/p2p/src/message.rs index 9013a8ca..62b01501 100644 --- a/crates/p2p/src/message.rs +++ b/crates/p2p/src/message.rs @@ -72,13 +72,19 @@ pub struct ValidatorAuthenticationInitiationRequest { #[derive(Debug, Clone, Serialize, Deserialize)] pub struct ValidatorAuthenticationInitiationResponse { - pub signed_message: String, + pub signature: String, pub message: String, } +impl From for Response { + fn from(response: ValidatorAuthenticationInitiationResponse) -> Self { + Response::ValidatorAuthentication(ValidatorAuthenticationResponse::Initiation(response)) + } +} + #[derive(Debug, Clone, Serialize, Deserialize)] pub struct ValidatorAuthenticationSolutionRequest { - pub signed_message: String, + pub signature: String, } #[derive(Debug, Clone, Serialize, Deserialize)] @@ -87,6 +93,12 @@ pub enum ValidatorAuthenticationSolutionResponse { Rejected, } +impl From for Response { + fn from(response: ValidatorAuthenticationSolutionResponse) -> Self { + Response::ValidatorAuthentication(ValidatorAuthenticationResponse::Solution(response)) + } +} + #[derive(Debug, Clone, Serialize, Deserialize)] pub struct HardwareChallengeRequest { pub challenge: String, // TODO diff --git a/crates/worker/Cargo.toml b/crates/worker/Cargo.toml index 18596ba5..3ee03e12 100644 --- a/crates/worker/Cargo.toml +++ b/crates/worker/Cargo.toml @@ -8,6 +8,8 @@ workspace = true [dependencies] shared = { workspace = true } +p2p = { workspace = true } + actix-web = { workspace = true } bollard = "0.18.1" clap = { workspace = true } diff --git a/crates/worker/src/p2p/mod.rs b/crates/worker/src/p2p/mod.rs index 9393f985..3cc35009 100644 --- a/crates/worker/src/p2p/mod.rs +++ b/crates/worker/src/p2p/mod.rs @@ -2,3 +2,277 @@ pub(crate) mod service; pub(crate) use service::P2PContext; pub(crate) use service::P2PService; + +use anyhow::Context as _; +use anyhow::Result; +use p2p::Node; +use p2p::NodeBuilder; +use p2p::PeerId; +use p2p::Response; +use p2p::{IncomingMessage, Libp2pIncomingMessage, OutgoingMessage}; +use shared::web3::wallet::Wallet; +use std::collections::HashMap; +use std::collections::HashSet; +use std::sync::Arc; +use std::time::SystemTime; +use tokio::sync::mpsc::{Receiver, Sender}; +use tokio::sync::RwLock; +use tokio_util::sync::CancellationToken; + +fn build_p2p_node( + port: u16, + cancellation_token: CancellationToken, +) -> Result<(Node, Receiver, Sender)> { + NodeBuilder::new() + .with_port(port) + .with_validator_authentication() + .with_hardware_challenge() + .with_invite() + .with_get_task_logs() + .with_restart() + .with_cancellation_token(cancellation_token) + .try_build() +} + +pub(crate) struct Service { + node: Node, + incoming_messages: Receiver, + cancellation_token: CancellationToken, + context: Context, +} + +impl Service { + pub(crate) fn new( + port: u16, + wallet: Wallet, + validator_addresses: HashSet, + cancellation_token: CancellationToken, + ) -> Result { + let (node, incoming_messages, outgoing_messages) = + build_p2p_node(port, cancellation_token.clone()).context("failed to build p2p node")?; + Ok(Self { + node, + incoming_messages, + cancellation_token, + context: Context::new(wallet, outgoing_messages, validator_addresses), + }) + } + + pub(crate) async fn run(self) { + let Self { + node, + mut incoming_messages, + cancellation_token, + context, + } = self; + + loop { + tokio::select! { + _ = cancellation_token.cancelled() => { + break; + } + Some(message) = (&mut incoming_messages).recv() => { + // TODO: spawn and store handles + if let Err(e) = handle_incoming_message(message, context.clone()) + .await { + tracing::error!("failed to handle incoming message: {e}"); + } + } + } + } + } +} + +#[derive(Clone)] +struct Context { + authorized_peers: Arc>>, + ongoing_auth_challenges: Arc>>, // use request_id? + nonce_cache: Arc>>, + wallet: Wallet, + outgoing_messages: Sender, + validator_addresses: Arc>>, +} + +impl Context { + fn new( + wallet: Wallet, + outgoing_messages: Sender, + validator_addresses: HashSet, + ) -> Self { + Self { + authorized_peers: Arc::new(RwLock::new(HashSet::new())), + ongoing_auth_challenges: Arc::new(RwLock::new(HashMap::new())), + nonce_cache: Arc::new(RwLock::new(HashMap::new())), + wallet, + outgoing_messages, + validator_addresses: Arc::new(RwLock::new(validator_addresses)), + } + } +} + +async fn handle_incoming_message(message: IncomingMessage, context: Context) -> Result<()> { + match message.message { + Libp2pIncomingMessage::Request { + request_id: _, + request, + channel, + } => { + tracing::debug!("received incoming request {request:?}"); + handle_incoming_request(message.peer, request, channel, context).await?; + } + Libp2pIncomingMessage::Response { + request_id: _, + response, + } => { + tracing::debug!("received incoming response {response:?}"); + handle_incoming_response(response).await?; + } + } + Ok(()) +} + +async fn handle_incoming_request( + from: PeerId, + request: p2p::Request, + channel: p2p::ResponseChannel, + context: Context, +) -> Result<()> { + match request { + p2p::Request::ValidatorAuthentication(req) => { + tracing::debug!("handling ValidatorAuthentication request"); + match req { + p2p::ValidatorAuthenticationRequest::Initiation(req) => { + let resp = + handle_validator_authentication_initiation_request(from, req, &context) + .await + .context("failed to handle ValidatorAuthenticationInitiationRequest")?; + let outgoing_message = resp.into_outgoing_message(channel); + context + .outgoing_messages + .send(outgoing_message) + .await + .context("failed to send ValidatorAuthentication response")?; + } + p2p::ValidatorAuthenticationRequest::Solution(req) => { + let resp = match handle_validator_authentication_initiation_solution( + from, req, &context, + ) + .await + { + Ok(resp) => resp, + Err(e) => { + tracing::error!( + "failed to handle ValidatorAuthenticationSolutionRequest: {e}" + ); + p2p::ValidatorAuthenticationSolutionResponse::Rejected.into() + } + }; + let outgoing_message = resp.into_outgoing_message(channel); + context + .outgoing_messages + .send(outgoing_message) + .await + .context("failed to send ValidatorAuthenticationSolution response")?; + } + } + } + p2p::Request::HardwareChallenge(req) => { + tracing::debug!("handling HardwareChallenge request"); + } + p2p::Request::Invite(_) => { + tracing::debug!("handling Invite request"); + } + p2p::Request::GetTaskLogs => { + tracing::debug!("handling GetTaskLogs request"); + } + p2p::Request::Restart => { + tracing::debug!("handling Restart request"); + } + } + Ok(()) +} + +async fn handle_validator_authentication_initiation_request( + from: PeerId, + req: p2p::ValidatorAuthenticationInitiationRequest, + context: &Context, +) -> Result { + use rand_v8::Rng as _; + use shared::security::request_signer::sign_message; + + // generate a fresh cryptographically secure challenge message for this auth attempt + let challenge_bytes: [u8; 32] = rand_v8::rngs::OsRng.gen(); + let challenge_message = hex::encode(challenge_bytes); + let signature = sign_message(&req.message, &context.wallet) + .await + .map_err(|e| anyhow::anyhow!("failed to sign message: {e:?}"))?; + + // store the challenge message in nonce cache to prevent replay + let mut nonce_cache = context.nonce_cache.write().await; + nonce_cache.insert(challenge_message.clone(), SystemTime::now()); + + // store the current challenge for this peer + let mut ongoing_auth_challenges = context.ongoing_auth_challenges.write().await; + ongoing_auth_challenges.insert(from, challenge_message.clone()); + + Ok(p2p::ValidatorAuthenticationInitiationResponse { + message: challenge_message, + signature, + } + .into()) +} + +async fn handle_validator_authentication_initiation_solution( + from: PeerId, + req: p2p::ValidatorAuthenticationSolutionRequest, + context: &Context, +) -> Result { + use std::str::FromStr as _; + + let mut ongoing_auth_challenges = context.ongoing_auth_challenges.write().await; + let challenge_message = ongoing_auth_challenges + .remove(&from) + .ok_or_else(|| anyhow::anyhow!("no ongoing authentication challenge for peer {from}"))?; + + let mut nonce_cache = context.nonce_cache.write().await; + if nonce_cache.remove(&challenge_message).is_none() { + anyhow::bail!("challenge message {challenge_message} not found in nonce cache"); + } + + let Ok(signature) = alloy::primitives::Signature::from_str(&req.signature) else { + anyhow::bail!("failed to parse signature from message"); + }; + + let Ok(recovered_address) = signature.recover_address_from_msg(challenge_message) else { + anyhow::bail!("failed to recover address from signature and message"); + }; + + let validator_addresses = context.validator_addresses.read().await; + if !validator_addresses.contains(&recovered_address) { + anyhow::bail!("recovered address {recovered_address} is not in the list of authorized validator addresses"); + } + + Ok(p2p::ValidatorAuthenticationSolutionResponse::Granted.into()) +} + +async fn handle_incoming_response(response: p2p::Response) -> Result<()> { + match response { + p2p::Response::ValidatorAuthentication(_) => { + // critical developer error, could panic here + tracing::error!("worker should never receive ValidatorAuthentication responses"); + } + p2p::Response::HardwareChallenge(_) => { + tracing::debug!("handling HardwareChallenge response"); + } + p2p::Response::Invite(_) => { + tracing::debug!("handling Invite response"); + } + p2p::Response::GetTaskLogs(_) => { + tracing::debug!("handling GetTaskLogs response"); + } + p2p::Response::Restart(_) => { + tracing::debug!("handling Restart response"); + } + } + Ok(()) +} From d780aae61896777f21cc94cd6c4e1cfde5f040e1 Mon Sep 17 00:00:00 2001 From: elizabeth Date: Tue, 8 Jul 2025 17:10:43 -0400 Subject: [PATCH 06/38] implement more request handlers --- crates/p2p/src/message.rs | 34 +++++++- crates/worker/src/p2p/mod.rs | 149 +++++++++++++++++++++++------------ 2 files changed, 128 insertions(+), 55 deletions(-) diff --git a/crates/p2p/src/message.rs b/crates/p2p/src/message.rs index 62b01501..5c9b1e70 100644 --- a/crates/p2p/src/message.rs +++ b/crates/p2p/src/message.rs @@ -111,6 +111,12 @@ pub struct HardwareChallengeResponse { pub timestamp: SystemTime, } +impl From for Response { + fn from(response: HardwareChallengeResponse) -> Self { + Response::HardwareChallenge(response) + } +} + #[derive(Debug, Clone, Serialize, Deserialize)] pub enum InviteRequestUrl { MasterUrl(String), @@ -133,12 +139,32 @@ pub enum InviteResponse { Error(String), } +impl From for Response { + fn from(response: InviteResponse) -> Self { + Response::Invite(response) + } +} + #[derive(Debug, Clone, Serialize, Deserialize)] -pub struct GetTaskLogsResponse { - pub logs: Result, String>, +pub enum GetTaskLogsResponse { + Ok(String), + Error(String), +} + +impl From for Response { + fn from(response: GetTaskLogsResponse) -> Self { + Response::GetTaskLogs(response) + } } #[derive(Debug, Clone, Serialize, Deserialize)] -pub struct RestartResponse { - pub result: Result<(), String>, +pub enum RestartResponse { + Ok, + Error(String), +} + +impl From for Response { + fn from(response: RestartResponse) -> Self { + Response::Restart(response) + } } diff --git a/crates/worker/src/p2p/mod.rs b/crates/worker/src/p2p/mod.rs index 3cc35009..ef9978b3 100644 --- a/crates/worker/src/p2p/mod.rs +++ b/crates/worker/src/p2p/mod.rs @@ -19,20 +19,7 @@ use tokio::sync::mpsc::{Receiver, Sender}; use tokio::sync::RwLock; use tokio_util::sync::CancellationToken; -fn build_p2p_node( - port: u16, - cancellation_token: CancellationToken, -) -> Result<(Node, Receiver, Sender)> { - NodeBuilder::new() - .with_port(port) - .with_validator_authentication() - .with_hardware_challenge() - .with_invite() - .with_get_task_logs() - .with_restart() - .with_cancellation_token(cancellation_token) - .try_build() -} +use crate::docker::DockerService; pub(crate) struct Service { node: Node, @@ -46,6 +33,7 @@ impl Service { port: u16, wallet: Wallet, validator_addresses: HashSet, + docker_service: Arc, cancellation_token: CancellationToken, ) -> Result { let (node, incoming_messages, outgoing_messages) = @@ -54,13 +42,18 @@ impl Service { node, incoming_messages, cancellation_token, - context: Context::new(wallet, outgoing_messages, validator_addresses), + context: Context::new( + wallet, + outgoing_messages, + validator_addresses, + docker_service, + ), }) } pub(crate) async fn run(self) { let Self { - node, + node: _, mut incoming_messages, cancellation_token, context, @@ -83,6 +76,21 @@ impl Service { } } +fn build_p2p_node( + port: u16, + cancellation_token: CancellationToken, +) -> Result<(Node, Receiver, Sender)> { + NodeBuilder::new() + .with_port(port) + .with_validator_authentication() + .with_hardware_challenge() + .with_invite() + .with_get_task_logs() + .with_restart() + .with_cancellation_token(cancellation_token) + .try_build() +} + #[derive(Clone)] struct Context { authorized_peers: Arc>>, @@ -90,7 +98,8 @@ struct Context { nonce_cache: Arc>>, wallet: Wallet, outgoing_messages: Sender, - validator_addresses: Arc>>, + validator_addresses: Arc>, + docker_service: Arc, } impl Context { @@ -98,6 +107,7 @@ impl Context { wallet: Wallet, outgoing_messages: Sender, validator_addresses: HashSet, + docker_service: Arc, ) -> Self { Self { authorized_peers: Arc::new(RwLock::new(HashSet::new())), @@ -105,7 +115,8 @@ impl Context { nonce_cache: Arc::new(RwLock::new(HashMap::new())), wallet, outgoing_messages, - validator_addresses: Arc::new(RwLock::new(validator_addresses)), + validator_addresses: Arc::new(validator_addresses), + docker_service, } } } @@ -137,58 +148,55 @@ async fn handle_incoming_request( channel: p2p::ResponseChannel, context: Context, ) -> Result<()> { - match request { + let resp = match request { p2p::Request::ValidatorAuthentication(req) => { tracing::debug!("handling ValidatorAuthentication request"); match req { p2p::ValidatorAuthenticationRequest::Initiation(req) => { - let resp = - handle_validator_authentication_initiation_request(from, req, &context) - .await - .context("failed to handle ValidatorAuthenticationInitiationRequest")?; - let outgoing_message = resp.into_outgoing_message(channel); - context - .outgoing_messages - .send(outgoing_message) + handle_validator_authentication_initiation_request(from, req, &context) .await - .context("failed to send ValidatorAuthentication response")?; + .context("failed to handle ValidatorAuthenticationInitiationRequest")? } p2p::ValidatorAuthenticationRequest::Solution(req) => { - let resp = match handle_validator_authentication_initiation_solution( - from, req, &context, - ) - .await + match handle_validator_authentication_initiation_solution(from, req, &context) + .await { - Ok(resp) => resp, + Ok(resp) => p2p::ValidatorAuthenticationSolutionResponse::Granted.into(), Err(e) => { tracing::error!( "failed to handle ValidatorAuthenticationSolutionRequest: {e}" ); p2p::ValidatorAuthenticationSolutionResponse::Rejected.into() } - }; - let outgoing_message = resp.into_outgoing_message(channel); - context - .outgoing_messages - .send(outgoing_message) - .await - .context("failed to send ValidatorAuthenticationSolution response")?; + } } } } - p2p::Request::HardwareChallenge(req) => { + p2p::Request::HardwareChallenge(_) => { tracing::debug!("handling HardwareChallenge request"); + todo!() } p2p::Request::Invite(_) => { tracing::debug!("handling Invite request"); + handle_invite_request(from, request, &context).await } p2p::Request::GetTaskLogs => { tracing::debug!("handling GetTaskLogs request"); + handle_get_task_logs_request(from, &context).await } p2p::Request::Restart => { tracing::debug!("handling Restart request"); + handle_restart_request(from, &context).await } - } + }; + + let outgoing_message = resp.into_outgoing_message(channel); + context + .outgoing_messages + .send(outgoing_message) + .await + .context("failed to send ValidatorAuthentication response")?; + Ok(()) } @@ -226,7 +234,7 @@ async fn handle_validator_authentication_initiation_solution( from: PeerId, req: p2p::ValidatorAuthenticationSolutionRequest, context: &Context, -) -> Result { +) -> Result<()> { use std::str::FromStr as _; let mut ongoing_auth_challenges = context.ongoing_auth_challenges.write().await; @@ -247,31 +255,70 @@ async fn handle_validator_authentication_initiation_solution( anyhow::bail!("failed to recover address from signature and message"); }; - let validator_addresses = context.validator_addresses.read().await; - if !validator_addresses.contains(&recovered_address) { + if !context.validator_addresses.contains(&recovered_address) { anyhow::bail!("recovered address {recovered_address} is not in the list of authorized validator addresses"); } - Ok(p2p::ValidatorAuthenticationSolutionResponse::Granted.into()) + Ok(()) +} + +async fn handle_invite_request( + from: PeerId, + _request: p2p::Request, + context: &Context, +) -> Response { + let authorized_peers = context.authorized_peers.read().await; + if !authorized_peers.contains(&from) { + return p2p::InviteResponse::Error("unauthorized".to_string()).into(); + } + + p2p::InviteResponse::Ok.into() +} + +async fn handle_get_task_logs_request(from: PeerId, context: &Context) -> Response { + let authorized_peers = context.authorized_peers.read().await; + if !authorized_peers.contains(&from) { + return p2p::GetTaskLogsResponse::Error("unauthorized".to_string()).into(); + } + + match context.docker_service.get_logs().await { + Ok(logs) => p2p::GetTaskLogsResponse::Ok(logs).into(), + Err(e) => { + return p2p::GetTaskLogsResponse::Error(format!("failed to get task logs: {e:?}")) + .into(); + } + } +} + +async fn handle_restart_request(from: PeerId, context: &Context) -> Response { + let authorized_peers = context.authorized_peers.read().await; + if !authorized_peers.contains(&from) { + return p2p::RestartResponse::Error("unauthorized".to_string()).into(); + } + + match context.docker_service.restart_task().await { + Ok(()) => p2p::RestartResponse::Ok.into(), + Err(e) => p2p::RestartResponse::Error(format!("failed to restart task: {e:?}")).into(), + } } async fn handle_incoming_response(response: p2p::Response) -> Result<()> { + // critical developer error if any of these happen, could panic here match response { p2p::Response::ValidatorAuthentication(_) => { - // critical developer error, could panic here tracing::error!("worker should never receive ValidatorAuthentication responses"); } p2p::Response::HardwareChallenge(_) => { - tracing::debug!("handling HardwareChallenge response"); + tracing::error!("worker should never receive HardwareChallenge responses"); } p2p::Response::Invite(_) => { - tracing::debug!("handling Invite response"); + tracing::error!("worker should never receive Invite responses"); } p2p::Response::GetTaskLogs(_) => { - tracing::debug!("handling GetTaskLogs response"); + tracing::error!("worker should never receive GetTaskLogs responses"); } p2p::Response::Restart(_) => { - tracing::debug!("handling Restart response"); + tracing::error!("worker should never receive Restart responses"); } } Ok(()) From bcaa44402fa86210b135fb2dee171facd9126dc7 Mon Sep 17 00:00:00 2001 From: elizabeth Date: Tue, 8 Jul 2025 17:29:47 -0400 Subject: [PATCH 07/38] impl hardware challenge, add new p2p to worker cli --- Cargo.lock | 1 + crates/p2p/Cargo.toml | 1 + crates/p2p/src/challenge_message.rs | 89 +++++++++++++++++++++++++++++ crates/p2p/src/lib.rs | 19 +++--- crates/p2p/src/message.rs | 7 ++- crates/worker/src/cli/command.rs | 67 ++++++++++++---------- crates/worker/src/p2p/mod.rs | 54 +++++++++++------ 7 files changed, 182 insertions(+), 56 deletions(-) create mode 100644 crates/p2p/src/challenge_message.rs diff --git a/Cargo.lock b/Cargo.lock index 89b858dd..200a21e8 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -6263,6 +6263,7 @@ version = "0.3.11" dependencies = [ "anyhow", "libp2p", + "nalgebra", "serde", "tokio", "tokio-util", diff --git a/crates/p2p/Cargo.toml b/crates/p2p/Cargo.toml index 46f9833a..ba52d570 100644 --- a/crates/p2p/Cargo.toml +++ b/crates/p2p/Cargo.toml @@ -8,6 +8,7 @@ libp2p = { version = "0.54", features = ["request-response", "identify", "ping", void = "1.0" anyhow = {workspace = true} +nalgebra = {workspace = true} serde = {workspace = true} tokio = {workspace = true, features = ["sync"]} tokio-util = { workspace = true, features = ["rt"] } diff --git a/crates/p2p/src/challenge_message.rs b/crates/p2p/src/challenge_message.rs new file mode 100644 index 00000000..639cc602 --- /dev/null +++ b/crates/p2p/src/challenge_message.rs @@ -0,0 +1,89 @@ +use nalgebra::DMatrix; +use serde::{ + de::{self, Visitor}, + Deserialize, Deserializer, Serialize, Serializer, +}; +use std::fmt; + +#[derive(Debug, Clone)] +pub struct FixedF64(pub f64); + +impl Serialize for FixedF64 { + fn serialize(&self, serializer: S) -> Result + where + S: Serializer, + { + // adjust precision as needed + serializer.serialize_str(&format!("{:.12}", self.0)) + } +} + +impl<'de> Deserialize<'de> for FixedF64 { + fn deserialize(deserializer: D) -> Result + where + D: Deserializer<'de>, + { + struct FixedF64Visitor; + + impl Visitor<'_> for FixedF64Visitor { + type Value = FixedF64; + + fn expecting(&self, formatter: &mut fmt::Formatter) -> fmt::Result { + formatter.write_str("a string representing a fixed precision float") + } + + fn visit_str(self, value: &str) -> Result + where + E: de::Error, + { + value + .parse::() + .map(FixedF64) + .map_err(|_| E::custom(format!("invalid f64: {value}"))) + } + } + + deserializer.deserialize_str(FixedF64Visitor) + } +} + +impl PartialEq for FixedF64 { + fn eq(&self, other: &Self) -> bool { + format!("{:.10}", self.0) == format!("{:.10}", other.0) + } +} + +#[derive(Deserialize, Serialize, Debug, Clone, PartialEq)] +pub struct ChallengeRequest { + pub rows_a: usize, + pub cols_a: usize, + pub data_a: Vec, + pub rows_b: usize, + pub cols_b: usize, + pub data_b: Vec, + pub timestamp: Option, +} + +#[derive(Deserialize, Serialize, Debug, Clone, PartialEq)] +pub struct ChallengeResponse { + pub result: Vec, + pub rows: usize, + pub cols: usize, +} + +pub fn calc_matrix(req: &ChallengeRequest) -> ChallengeResponse { + // convert FixedF64 to f64 + let data_a: Vec = req.data_a.iter().map(|x| x.0).collect(); + let data_b: Vec = req.data_b.iter().map(|x| x.0).collect(); + let a = DMatrix::from_vec(req.rows_a, req.cols_a, data_a); + let b = DMatrix::from_vec(req.rows_b, req.cols_b, data_b); + let c = a * b; + + let data_c: Vec = c.iter().map(|x| FixedF64(*x)).collect(); + + ChallengeResponse { + rows: c.nrows(), + cols: c.ncols(), + result: data_c, + } +} diff --git a/crates/p2p/src/lib.rs b/crates/p2p/src/lib.rs index c02f5f09..46105a36 100644 --- a/crates/p2p/src/lib.rs +++ b/crates/p2p/src/lib.rs @@ -5,23 +5,27 @@ use libp2p::noise; use libp2p::swarm::SwarmEvent; use libp2p::tcp; use libp2p::yamux; -use libp2p::Multiaddr; use libp2p::Swarm; use libp2p::SwarmBuilder; use libp2p::{identity, Transport}; use std::time::Duration; mod behaviour; +mod challenge_message; mod message; mod protocol; use behaviour::Behaviour; use protocol::Protocols; +// TODO: put these in a mod +pub use challenge_message::*; pub use message::*; + pub type Libp2pIncomingMessage = libp2p::request_response::Message; pub type ResponseChannel = libp2p::request_response::ResponseChannel; pub type PeerId = libp2p::PeerId; +pub type Multiaddr = libp2p::Multiaddr; pub const PRIME_STREAM_PROTOCOL: libp2p::StreamProtocol = libp2p::StreamProtocol::new("/prime/1.0.0"); @@ -365,9 +369,8 @@ mod test { println!("received request from node1"); // send response from node2->node1 - let response = message::Response::GetTaskLogs(message::GetTaskLogsResponse { - logs: Ok(vec!["log1".to_string(), "log2".to_string()]), - }); + let response = + message::Response::GetTaskLogs(message::GetTaskLogsResponse::Ok("logs".to_string())); outgoing_message_tx2 .send(response.into_outgoing_message(channel)) .await @@ -381,9 +384,9 @@ mod test { else { panic!("expected a GetTaskLogs response message"); }; - assert_eq!( - response.logs, - Ok(vec!["log1".to_string(), "log2".to_string()]) - ); + let message::GetTaskLogsResponse::Ok(logs) = response else { + panic!("expected a successful GetTaskLogs response"); + }; + assert_eq!(logs, "logs"); } } diff --git a/crates/p2p/src/message.rs b/crates/p2p/src/message.rs index 5c9b1e70..c0fd2d66 100644 --- a/crates/p2p/src/message.rs +++ b/crates/p2p/src/message.rs @@ -2,6 +2,9 @@ use libp2p::PeerId; use serde::{Deserialize, Serialize}; use std::time::SystemTime; +use crate::ChallengeRequest; +use crate::ChallengeResponse; + #[derive(Debug)] pub struct IncomingMessage { pub peer: PeerId, @@ -101,13 +104,13 @@ impl From for Response { #[derive(Debug, Clone, Serialize, Deserialize)] pub struct HardwareChallengeRequest { - pub challenge: String, // TODO + pub challenge: ChallengeRequest, pub timestamp: SystemTime, } #[derive(Debug, Clone, Serialize, Deserialize)] pub struct HardwareChallengeResponse { - pub response: String, // TODO + pub response: ChallengeResponse, pub timestamp: SystemTime, } diff --git a/crates/worker/src/cli/command.rs b/crates/worker/src/cli/command.rs index 92de379e..5698568e 100644 --- a/crates/worker/src/cli/command.rs +++ b/crates/worker/src/cli/command.rs @@ -9,8 +9,6 @@ use crate::metrics::store::MetricsStore; use crate::operations::compute_node::ComputeNodeOperations; use crate::operations::heartbeat::service::HeartbeatService; use crate::operations::provider::ProviderOperations; -use crate::p2p::P2PContext; -use crate::p2p::P2PService; use crate::services::discovery::DiscoveryService; use crate::services::discovery_updater::DiscoveryUpdater; use crate::state::system_state::SystemState; @@ -701,14 +699,14 @@ pub async fn execute_command( } }; - let p2p_context = P2PContext { - docker_service: docker_service.clone(), - heartbeat_service: heartbeat.clone(), - system_state: state.clone(), - contracts: contracts.clone(), - node_wallet: node_wallet_instance.clone(), - provider_wallet: provider_wallet_instance.clone(), - }; + // let p2p_context = P2PContext { + // docker_service: docker_service.clone(), + // heartbeat_service: heartbeat.clone(), + // system_state: state.clone(), + // contracts: contracts.clone(), + // node_wallet: node_wallet_instance.clone(), + // provider_wallet: provider_wallet_instance.clone(), + // }; let validators = match contracts.prime_network.get_validator_role().await { Ok(validators) => validators, @@ -728,15 +726,31 @@ pub async fn execute_command( let mut allowed_addresses = vec![pool_info.creator, pool_info.compute_manager_key]; allowed_addresses.extend(validators); - let p2p_service = match P2PService::new( - state.worker_p2p_seed, - cancellation_token.clone(), - Some(p2p_context), + // let p2p_service = match P2PService::new( + // state.worker_p2p_seed, + // cancellation_token.clone(), + // Some(p2p_context), + // node_wallet_instance.clone(), + // allowed_addresses, + // ) + // .await + // { + // Ok(service) => service, + // Err(e) => { + // error!("❌ Failed to start P2P service: {e}"); + // std::process::exit(1); + // } + // }; + + let port = 0; // TODO: cli option + let validator_addresses = std::collections::HashSet::from_iter(allowed_addresses); + let p2p_service = match crate::p2p::Service::new( + port, node_wallet_instance.clone(), - allowed_addresses, - ) - .await - { + validator_addresses, + docker_service.clone(), + cancellation_token.clone(), + ) { Ok(service) => service, Err(e) => { error!("❌ Failed to start P2P service: {e}"); @@ -744,23 +758,18 @@ pub async fn execute_command( } }; - if let Err(e) = p2p_service.start() { - error!("❌ Failed to start P2P listener: {e}"); - std::process::exit(1); - } - - node_config.worker_p2p_id = Some(p2p_service.node_id().to_string()); + let peer_id = p2p_service.peer_id(); + node_config.worker_p2p_id = Some(peer_id.to_string()); node_config.worker_p2p_addresses = Some( p2p_service - .listening_addresses() + .listen_addrs() .iter() .map(|addr| addr.to_string()) .collect(), ); - Console::success(&format!( - "P2P service started with ID: {}", - p2p_service.node_id() - )); + tokio::task::spawn(p2p_service.run()); + + Console::success(&format!("P2P service started with ID: {peer_id}",)); let mut attempts = 0; let max_attempts = 100; diff --git a/crates/worker/src/p2p/mod.rs b/crates/worker/src/p2p/mod.rs index ef9978b3..78b8927c 100644 --- a/crates/worker/src/p2p/mod.rs +++ b/crates/worker/src/p2p/mod.rs @@ -1,8 +1,3 @@ -pub(crate) mod service; - -pub(crate) use service::P2PContext; -pub(crate) use service::P2PService; - use anyhow::Context as _; use anyhow::Result; use p2p::Node; @@ -51,6 +46,14 @@ impl Service { }) } + pub(crate) fn peer_id(&self) -> PeerId { + self.node.peer_id() + } + + pub(crate) fn listen_addrs(&self) -> &[p2p::Multiaddr] { + self.node.listen_addrs() + } + pub(crate) async fn run(self) { let Self { node: _, @@ -64,7 +67,7 @@ impl Service { _ = cancellation_token.cancelled() => { break; } - Some(message) = (&mut incoming_messages).recv() => { + Some(message) = incoming_messages.recv() => { // TODO: spawn and store handles if let Err(e) = handle_incoming_message(message, context.clone()) .await { @@ -136,7 +139,7 @@ async fn handle_incoming_message(message: IncomingMessage, context: Context) -> response, } => { tracing::debug!("received incoming response {response:?}"); - handle_incoming_response(response).await?; + handle_incoming_response(response); } } Ok(()) @@ -161,10 +164,10 @@ async fn handle_incoming_request( match handle_validator_authentication_initiation_solution(from, req, &context) .await { - Ok(resp) => p2p::ValidatorAuthenticationSolutionResponse::Granted.into(), + Ok(()) => p2p::ValidatorAuthenticationSolutionResponse::Granted.into(), Err(e) => { tracing::error!( - "failed to handle ValidatorAuthenticationSolutionRequest: {e}" + "failed to handle ValidatorAuthenticationSolutionRequest: {e:?}" ); p2p::ValidatorAuthenticationSolutionResponse::Rejected.into() } @@ -172,9 +175,11 @@ async fn handle_incoming_request( } } } - p2p::Request::HardwareChallenge(_) => { + p2p::Request::HardwareChallenge(req) => { tracing::debug!("handling HardwareChallenge request"); - todo!() + handle_hardware_challenge_request(from, req, &context) + .await + .context("failed to handle HardwareChallenge request")? } p2p::Request::Invite(_) => { tracing::debug!("handling Invite request"); @@ -262,6 +267,25 @@ async fn handle_validator_authentication_initiation_solution( Ok(()) } +async fn handle_hardware_challenge_request( + from: PeerId, + request: p2p::HardwareChallengeRequest, + context: &Context, +) -> Result { + let authorized_peers = context.authorized_peers.read().await; + if !authorized_peers.contains(&from) { + // TODO: error response variant? + anyhow::bail!("unauthorized peer {from} attempted to access HardwareChallenge request"); + } + + let challenge_response = p2p::calc_matrix(&request.challenge); + let response = p2p::HardwareChallengeResponse { + response: challenge_response, + timestamp: SystemTime::now(), + }; + Ok(response.into()) +} + async fn handle_invite_request( from: PeerId, _request: p2p::Request, @@ -283,10 +307,7 @@ async fn handle_get_task_logs_request(from: PeerId, context: &Context) -> Respon match context.docker_service.get_logs().await { Ok(logs) => p2p::GetTaskLogsResponse::Ok(logs).into(), - Err(e) => { - return p2p::GetTaskLogsResponse::Error(format!("failed to get task logs: {e:?}")) - .into(); - } + Err(e) => p2p::GetTaskLogsResponse::Error(format!("failed to get task logs: {e:?}")).into(), } } @@ -302,7 +323,7 @@ async fn handle_restart_request(from: PeerId, context: &Context) -> Response { } } -async fn handle_incoming_response(response: p2p::Response) -> Result<()> { +fn handle_incoming_response(response: p2p::Response) { // critical developer error if any of these happen, could panic here match response { p2p::Response::ValidatorAuthentication(_) => { @@ -321,5 +342,4 @@ async fn handle_incoming_response(response: p2p::Response) -> Result<()> { tracing::error!("worker should never receive Restart responses"); } } - Ok(()) } From 0f386af0694279df6f9cd36920518aab6c6e40f1 Mon Sep 17 00:00:00 2001 From: elizabeth Date: Tue, 8 Jul 2025 17:51:51 -0400 Subject: [PATCH 08/38] implement invite request handling, finish cli changes --- crates/worker/src/cli/command.rs | 43 ++--- crates/worker/src/operations/compute_node.rs | 9 +- crates/worker/src/p2p/mod.rs | 162 +++++++++++++++++-- crates/worker/src/state/system_state.rs | 4 +- 4 files changed, 162 insertions(+), 56 deletions(-) diff --git a/crates/worker/src/cli/command.rs b/crates/worker/src/cli/command.rs index 5698568e..db28deb2 100644 --- a/crates/worker/src/cli/command.rs +++ b/crates/worker/src/cli/command.rs @@ -60,7 +60,7 @@ pub enum Commands { /// Compute pool ID #[arg(long)] - compute_pool_id: u64, + compute_pool_id: u32, /// Dry run the command without starting the worker #[arg(long, default_value = "false")] @@ -174,7 +174,7 @@ pub enum Commands { /// Compute pool ID #[arg(long)] - compute_pool_id: u64, + compute_pool_id: u32, }, } @@ -215,7 +215,7 @@ pub async fn execute_command( let state = Arc::new(SystemState::new( state_dir_overwrite.clone(), *disable_state_storing, - Some(compute_pool_id.to_string()), + Some(*compute_pool_id), )); let private_key_provider = if let Some(key) = private_key_provider { @@ -294,7 +294,7 @@ pub async fn execute_command( let discovery_state = state.clone(); let discovery_updater = DiscoveryUpdater::new(discovery_service.clone(), discovery_state.clone()); - let pool_id = U256::from(*compute_pool_id as u32); + let pool_id = U256::from(*compute_pool_id); let pool_info = loop { match contracts.compute_pool.get_pool_info(pool_id).await { @@ -336,7 +336,7 @@ pub async fn execute_command( .address() .to_string(), compute_specs: None, - compute_pool_id: *compute_pool_id as u32, + compute_pool_id: *compute_pool_id, worker_p2p_id: None, worker_p2p_addresses: None, }; @@ -699,15 +699,6 @@ pub async fn execute_command( } }; - // let p2p_context = P2PContext { - // docker_service: docker_service.clone(), - // heartbeat_service: heartbeat.clone(), - // system_state: state.clone(), - // contracts: contracts.clone(), - // node_wallet: node_wallet_instance.clone(), - // provider_wallet: provider_wallet_instance.clone(), - // }; - let validators = match contracts.prime_network.get_validator_role().await { Ok(validators) => validators, Err(e) => { @@ -726,22 +717,6 @@ pub async fn execute_command( let mut allowed_addresses = vec![pool_info.creator, pool_info.compute_manager_key]; allowed_addresses.extend(validators); - // let p2p_service = match P2PService::new( - // state.worker_p2p_seed, - // cancellation_token.clone(), - // Some(p2p_context), - // node_wallet_instance.clone(), - // allowed_addresses, - // ) - // .await - // { - // Ok(service) => service, - // Err(e) => { - // error!("❌ Failed to start P2P service: {e}"); - // std::process::exit(1); - // } - // }; - let port = 0; // TODO: cli option let validator_addresses = std::collections::HashSet::from_iter(allowed_addresses); let p2p_service = match crate::p2p::Service::new( @@ -749,6 +724,10 @@ pub async fn execute_command( node_wallet_instance.clone(), validator_addresses, docker_service.clone(), + heartbeat.clone(), + state.clone(), + contracts.clone(), + provider_wallet_instance.clone(), cancellation_token.clone(), ) { Ok(service) => service, @@ -823,7 +802,7 @@ pub async fn execute_command( // Start monitoring compute node status on chain provider_ops.start_monitoring(provider_ops_cancellation); - let pool_id = state.compute_pool_id.clone().unwrap_or("0".to_string()); + let pool_id = state.compute_pool_id.unwrap_or(0); if let Err(err) = compute_node_ops.start_monitoring(cancellation_token.clone(), pool_id) { error!("❌ Failed to start node monitoring: {err}"); @@ -1062,7 +1041,7 @@ pub async fn execute_command( } }; - let pool_id = U256::from(*compute_pool_id as u32); + let pool_id = U256::from(*compute_pool_id); if compute_node_exists { match contracts diff --git a/crates/worker/src/operations/compute_node.rs b/crates/worker/src/operations/compute_node.rs index 39b18c29..7cbdbda2 100644 --- a/crates/worker/src/operations/compute_node.rs +++ b/crates/worker/src/operations/compute_node.rs @@ -32,7 +32,7 @@ impl<'c> ComputeNodeOperations<'c> { pub(crate) fn start_monitoring( &self, cancellation_token: CancellationToken, - pool_id: String, + pool_id: u32, ) -> Result<()> { let provider_address = self.provider_wallet.wallet.default_signer().address(); let node_address = self.node_wallet.wallet.default_signer().address(); @@ -81,9 +81,8 @@ impl<'c> ComputeNodeOperations<'c> { } // Check rewards for the current compute pool - if let Ok(pool_id_u32) = pool_id.parse::() { match contracts.compute_pool.calculate_node_rewards( - U256::from(pool_id_u32), + U256::from(pool_id), node_address, ).await { Ok((claimable, locked)) => { @@ -96,9 +95,9 @@ impl<'c> ComputeNodeOperations<'c> { } } Err(e) => { - log::debug!("Failed to check rewards for pool {pool_id_u32}: {e}"); + log::debug!("Failed to check rewards for pool {pool_id}: {e}"); } - } + } first_check = false; diff --git a/crates/worker/src/p2p/mod.rs b/crates/worker/src/p2p/mod.rs index 78b8927c..3c79b1b6 100644 --- a/crates/worker/src/p2p/mod.rs +++ b/crates/worker/src/p2p/mod.rs @@ -1,10 +1,12 @@ use anyhow::Context as _; use anyhow::Result; +use p2p::InviteRequestUrl; use p2p::Node; use p2p::NodeBuilder; use p2p::PeerId; use p2p::Response; use p2p::{IncomingMessage, Libp2pIncomingMessage, OutgoingMessage}; +use shared::web3::contracts::core::builder::Contracts; use shared::web3::wallet::Wallet; use std::collections::HashMap; use std::collections::HashSet; @@ -15,6 +17,9 @@ use tokio::sync::RwLock; use tokio_util::sync::CancellationToken; use crate::docker::DockerService; +use crate::operations::heartbeat::service::HeartbeatService; +use crate::state::system_state::SystemState; +use shared::web3::wallet::WalletProvider; pub(crate) struct Service { node: Node, @@ -24,11 +29,16 @@ pub(crate) struct Service { } impl Service { + #[allow(clippy::too_many_arguments)] pub(crate) fn new( port: u16, wallet: Wallet, validator_addresses: HashSet, docker_service: Arc, + heartbeat_service: Arc, + system_state: Arc, + contracts: Contracts, + provider_wallet: Wallet, cancellation_token: CancellationToken, ) -> Result { let (node, incoming_messages, outgoing_messages) = @@ -42,6 +52,10 @@ impl Service { outgoing_messages, validator_addresses, docker_service, + heartbeat_service, + system_state, + contracts, + provider_wallet, ), }) } @@ -97,20 +111,35 @@ fn build_p2p_node( #[derive(Clone)] struct Context { authorized_peers: Arc>>, + wallet: Wallet, + validator_addresses: Arc>, + + // for validator authentication requests ongoing_auth_challenges: Arc>>, // use request_id? nonce_cache: Arc>>, - wallet: Wallet, outgoing_messages: Sender, - validator_addresses: Arc>, + + // for get_task_logs and restart requests docker_service: Arc, + + // for invite requests + heartbeat_service: Arc, + system_state: Arc, + contracts: Contracts, + provider_wallet: Wallet, } impl Context { + #[allow(clippy::too_many_arguments)] fn new( wallet: Wallet, outgoing_messages: Sender, validator_addresses: HashSet, docker_service: Arc, + heartbeat_service: Arc, + system_state: Arc, + contracts: Contracts, + provider_wallet: Wallet, ) -> Self { Self { authorized_peers: Arc::new(RwLock::new(HashSet::new())), @@ -120,6 +149,10 @@ impl Context { outgoing_messages, validator_addresses: Arc::new(validator_addresses), docker_service, + heartbeat_service, + system_state, + contracts, + provider_wallet, } } } @@ -181,9 +214,12 @@ async fn handle_incoming_request( .await .context("failed to handle HardwareChallenge request")? } - p2p::Request::Invite(_) => { + p2p::Request::Invite(req) => { tracing::debug!("handling Invite request"); - handle_invite_request(from, request, &context).await + match handle_invite_request(from, req, &context).await { + Ok(()) => p2p::InviteResponse::Ok.into(), + Err(e) => p2p::InviteResponse::Error(e.to_string()).into(), + } } p2p::Request::GetTaskLogs => { tracing::debug!("handling GetTaskLogs request"); @@ -286,19 +322,6 @@ async fn handle_hardware_challenge_request( Ok(response.into()) } -async fn handle_invite_request( - from: PeerId, - _request: p2p::Request, - context: &Context, -) -> Response { - let authorized_peers = context.authorized_peers.read().await; - if !authorized_peers.contains(&from) { - return p2p::InviteResponse::Error("unauthorized".to_string()).into(); - } - - p2p::InviteResponse::Ok.into() -} - async fn handle_get_task_logs_request(from: PeerId, context: &Context) -> Response { let authorized_peers = context.authorized_peers.read().await; if !authorized_peers.contains(&from) { @@ -343,3 +366,108 @@ fn handle_incoming_response(response: p2p::Response) { } } } + +async fn handle_invite_request( + from: PeerId, + req: p2p::InviteRequest, + context: &Context, +) -> Result<()> { + use crate::console::Console; + use shared::web3::contracts::helpers::utils::retry_call; + use shared::web3::contracts::structs::compute_pool::PoolStatus; + + let authorized_peers = context.authorized_peers.read().await; + if !authorized_peers.contains(&from) { + return Err(anyhow::anyhow!( + "unauthorized peer {from} attempted to send invite" + )); + } + + if context.system_state.is_running().await { + anyhow::bail!("heartbeat is currently running and in a compute pool"); + } + + if let Some(pool_id) = context.system_state.compute_pool_id { + if req.pool_id != pool_id { + anyhow::bail!( + "pool ID mismatch: expected {}, got {}", + pool_id, + req.pool_id + ); + } + } + + let invite_bytes = hex::decode(&req.invite).context("failed to decode invite hex")?; + + if invite_bytes.len() < 65 { + anyhow::bail!("invite data is too short, expected at least 65 bytes"); + } + + let contracts = &context.contracts; + let pool_id = alloy::primitives::U256::from(req.pool_id); + + let bytes_array: [u8; 65] = match invite_bytes[..65].try_into() { + Ok(array) => array, + Err(_) => { + anyhow::bail!("failed to convert invite bytes to 65 byte array"); + } + }; + + let provider_address = context.provider_wallet.wallet.default_signer().address(); + + let pool_info = match contracts.compute_pool.get_pool_info(pool_id).await { + Ok(info) => info, + Err(err) => { + anyhow::bail!("failed to get pool info: {err:?}"); + } + }; + + if let PoolStatus::PENDING = pool_info.status { + anyhow::bail!("invalid invite; pool is pending"); + } + + let node_address = vec![context.wallet.wallet.default_signer().address()]; + let signatures = vec![alloy::primitives::FixedBytes::from(&bytes_array)]; + let call = contracts + .compute_pool + .build_join_compute_pool_call( + pool_id, + provider_address, + node_address, + vec![req.nonce], + vec![req.expiration], + signatures, + ) + .map_err(|e| anyhow::anyhow!("failed to build join compute pool call: {e:?}"))?; + + let provider = &context.provider_wallet.provider; + match retry_call(call, 3, provider.clone(), None).await { + Ok(result) => { + Console::section("WORKER JOINED COMPUTE POOL"); + Console::success(&format!( + "Successfully registered on chain with tx: {result}" + )); + Console::info( + "Status", + "Worker is now part of the compute pool and ready to receive tasks", + ); + } + Err(err) => { + anyhow::bail!("failed to join compute pool: {err:?}"); + } + } + + let heartbeat_endpoint = match req.url { + InviteRequestUrl::MasterIpPort(ip, port) => { + format!("http://{ip}:{port}/heartbeat") + } + InviteRequestUrl::MasterUrl(url) => format!("{url}/heartbeat"), + }; + + context + .heartbeat_service + .start(heartbeat_endpoint) + .await + .context("failed to start heartbeat service")?; + Ok(()) +} diff --git a/crates/worker/src/state/system_state.rs b/crates/worker/src/state/system_state.rs index fd8f0a3a..e419c870 100644 --- a/crates/worker/src/state/system_state.rs +++ b/crates/worker/src/state/system_state.rs @@ -34,7 +34,7 @@ pub(crate) struct SystemState { endpoint: Arc>>, state_dir_overwrite: Option, disable_state_storing: bool, - pub compute_pool_id: Option, + pub compute_pool_id: Option, pub worker_p2p_seed: Option, pub p2p_id: Option, @@ -45,7 +45,7 @@ impl SystemState { pub(crate) fn new( state_dir: Option, disable_state_storing: bool, - compute_pool_id: Option, + compute_pool_id: Option, ) -> Self { let default_state_dir = get_default_state_dir(); debug!("Default state dir: {default_state_dir:?}"); From 7bd100916bf65ca251cea390b38e954d67966de0 Mon Sep 17 00:00:00 2001 From: elizabeth Date: Wed, 9 Jul 2025 12:05:29 -0400 Subject: [PATCH 09/38] add full hardware challenge message --- Cargo.lock | 2 + crates/p2p/Cargo.toml | 2 + crates/p2p/src/challenge_message.rs | 89 +++++++++++++++++++++++++++++ crates/p2p/src/lib.rs | 45 +++++++++++---- crates/p2p/src/message.rs | 57 +++++++++++++++--- 5 files changed, 177 insertions(+), 18 deletions(-) create mode 100644 crates/p2p/src/challenge_message.rs diff --git a/Cargo.lock b/Cargo.lock index a64a46e4..ae652ad4 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -6263,8 +6263,10 @@ version = "0.3.11" dependencies = [ "anyhow", "libp2p", + "nalgebra", "serde", "tokio", + "tokio-util", "void", ] diff --git a/crates/p2p/Cargo.toml b/crates/p2p/Cargo.toml index 2d5d94ff..ba52d570 100644 --- a/crates/p2p/Cargo.toml +++ b/crates/p2p/Cargo.toml @@ -8,8 +8,10 @@ libp2p = { version = "0.54", features = ["request-response", "identify", "ping", void = "1.0" anyhow = {workspace = true} +nalgebra = {workspace = true} serde = {workspace = true} tokio = {workspace = true, features = ["sync"]} +tokio-util = { workspace = true, features = ["rt"] } [lints] workspace = true diff --git a/crates/p2p/src/challenge_message.rs b/crates/p2p/src/challenge_message.rs new file mode 100644 index 00000000..639cc602 --- /dev/null +++ b/crates/p2p/src/challenge_message.rs @@ -0,0 +1,89 @@ +use nalgebra::DMatrix; +use serde::{ + de::{self, Visitor}, + Deserialize, Deserializer, Serialize, Serializer, +}; +use std::fmt; + +#[derive(Debug, Clone)] +pub struct FixedF64(pub f64); + +impl Serialize for FixedF64 { + fn serialize(&self, serializer: S) -> Result + where + S: Serializer, + { + // adjust precision as needed + serializer.serialize_str(&format!("{:.12}", self.0)) + } +} + +impl<'de> Deserialize<'de> for FixedF64 { + fn deserialize(deserializer: D) -> Result + where + D: Deserializer<'de>, + { + struct FixedF64Visitor; + + impl Visitor<'_> for FixedF64Visitor { + type Value = FixedF64; + + fn expecting(&self, formatter: &mut fmt::Formatter) -> fmt::Result { + formatter.write_str("a string representing a fixed precision float") + } + + fn visit_str(self, value: &str) -> Result + where + E: de::Error, + { + value + .parse::() + .map(FixedF64) + .map_err(|_| E::custom(format!("invalid f64: {value}"))) + } + } + + deserializer.deserialize_str(FixedF64Visitor) + } +} + +impl PartialEq for FixedF64 { + fn eq(&self, other: &Self) -> bool { + format!("{:.10}", self.0) == format!("{:.10}", other.0) + } +} + +#[derive(Deserialize, Serialize, Debug, Clone, PartialEq)] +pub struct ChallengeRequest { + pub rows_a: usize, + pub cols_a: usize, + pub data_a: Vec, + pub rows_b: usize, + pub cols_b: usize, + pub data_b: Vec, + pub timestamp: Option, +} + +#[derive(Deserialize, Serialize, Debug, Clone, PartialEq)] +pub struct ChallengeResponse { + pub result: Vec, + pub rows: usize, + pub cols: usize, +} + +pub fn calc_matrix(req: &ChallengeRequest) -> ChallengeResponse { + // convert FixedF64 to f64 + let data_a: Vec = req.data_a.iter().map(|x| x.0).collect(); + let data_b: Vec = req.data_b.iter().map(|x| x.0).collect(); + let a = DMatrix::from_vec(req.rows_a, req.cols_a, data_a); + let b = DMatrix::from_vec(req.rows_b, req.cols_b, data_b); + let c = a * b; + + let data_c: Vec = c.iter().map(|x| FixedF64(*x)).collect(); + + ChallengeResponse { + rows: c.nrows(), + cols: c.ncols(), + result: data_c, + } +} diff --git a/crates/p2p/src/lib.rs b/crates/p2p/src/lib.rs index 9f07d8d0..46105a36 100644 --- a/crates/p2p/src/lib.rs +++ b/crates/p2p/src/lib.rs @@ -5,20 +5,28 @@ use libp2p::noise; use libp2p::swarm::SwarmEvent; use libp2p::tcp; use libp2p::yamux; -use libp2p::Multiaddr; use libp2p::Swarm; use libp2p::SwarmBuilder; -use libp2p::{identity, PeerId, Transport}; +use libp2p::{identity, Transport}; use std::time::Duration; mod behaviour; +mod challenge_message; mod message; mod protocol; use behaviour::Behaviour; -use message::{IncomingMessage, OutgoingMessage}; use protocol::Protocols; +// TODO: put these in a mod +pub use challenge_message::*; +pub use message::*; + +pub type Libp2pIncomingMessage = libp2p::request_response::Message; +pub type ResponseChannel = libp2p::request_response::ResponseChannel; +pub type PeerId = libp2p::PeerId; +pub type Multiaddr = libp2p::Multiaddr; + pub const PRIME_STREAM_PROTOCOL: libp2p::StreamProtocol = libp2p::StreamProtocol::new("/prime/1.0.0"); // TODO: force this to be passed by the user @@ -29,6 +37,7 @@ pub struct Node { listen_addrs: Vec, swarm: Swarm, bootnodes: Vec, + cancellation_token: tokio_util::sync::CancellationToken, // channel for sending incoming messages to the consumer of this library incoming_message_tx: tokio::sync::mpsc::Sender, @@ -66,6 +75,7 @@ impl Node { listen_addrs, mut swarm, bootnodes, + cancellation_token, incoming_message_tx, mut outgoing_message_rx, } = self; @@ -93,6 +103,10 @@ impl Node { loop { tokio::select! { + _ = cancellation_token.cancelled() => { + println!("cancellation token triggered, shutting down node"); + break Ok(()); + } Some(message) = outgoing_message_rx.recv() => { match message { OutgoingMessage::Request((peer, request)) => { @@ -143,6 +157,7 @@ pub struct NodeBuilder { agent_version: Option, protocols: Protocols, bootnodes: Vec, + cancellation_token: Option, } impl Default for NodeBuilder { @@ -160,6 +175,7 @@ impl NodeBuilder { agent_version: None, protocols: Protocols::new(), bootnodes: Vec::new(), + cancellation_token: None, } } @@ -224,6 +240,14 @@ impl NodeBuilder { self } + pub fn with_cancellation_token( + mut self, + cancellation_token: tokio_util::sync::CancellationToken, + ) -> Self { + self.cancellation_token = Some(cancellation_token); + self + } + pub fn try_build( self, ) -> Result<( @@ -238,6 +262,7 @@ impl NodeBuilder { agent_version, protocols, bootnodes, + cancellation_token, } = self; let keypair = keypair.unwrap_or(identity::Keypair::generate_ed25519()); @@ -279,6 +304,7 @@ impl NodeBuilder { bootnodes, incoming_message_tx, outgoing_message_rx, + cancellation_token: cancellation_token.unwrap_or_default(), }, incoming_message_rx, outgoing_message_tx, @@ -343,9 +369,8 @@ mod test { println!("received request from node1"); // send response from node2->node1 - let response = message::Response::GetTaskLogs(message::GetTaskLogsResponse { - logs: Ok(vec!["log1".to_string(), "log2".to_string()]), - }); + let response = + message::Response::GetTaskLogs(message::GetTaskLogsResponse::Ok("logs".to_string())); outgoing_message_tx2 .send(response.into_outgoing_message(channel)) .await @@ -359,9 +384,9 @@ mod test { else { panic!("expected a GetTaskLogs response message"); }; - assert_eq!( - response.logs, - Ok(vec!["log1".to_string(), "log2".to_string()]) - ); + let message::GetTaskLogsResponse::Ok(logs) = response else { + panic!("expected a successful GetTaskLogs response"); + }; + assert_eq!(logs, "logs"); } } diff --git a/crates/p2p/src/message.rs b/crates/p2p/src/message.rs index 9013a8ca..c0fd2d66 100644 --- a/crates/p2p/src/message.rs +++ b/crates/p2p/src/message.rs @@ -2,6 +2,9 @@ use libp2p::PeerId; use serde::{Deserialize, Serialize}; use std::time::SystemTime; +use crate::ChallengeRequest; +use crate::ChallengeResponse; + #[derive(Debug)] pub struct IncomingMessage { pub peer: PeerId, @@ -72,13 +75,19 @@ pub struct ValidatorAuthenticationInitiationRequest { #[derive(Debug, Clone, Serialize, Deserialize)] pub struct ValidatorAuthenticationInitiationResponse { - pub signed_message: String, + pub signature: String, pub message: String, } +impl From for Response { + fn from(response: ValidatorAuthenticationInitiationResponse) -> Self { + Response::ValidatorAuthentication(ValidatorAuthenticationResponse::Initiation(response)) + } +} + #[derive(Debug, Clone, Serialize, Deserialize)] pub struct ValidatorAuthenticationSolutionRequest { - pub signed_message: String, + pub signature: String, } #[derive(Debug, Clone, Serialize, Deserialize)] @@ -87,18 +96,30 @@ pub enum ValidatorAuthenticationSolutionResponse { Rejected, } +impl From for Response { + fn from(response: ValidatorAuthenticationSolutionResponse) -> Self { + Response::ValidatorAuthentication(ValidatorAuthenticationResponse::Solution(response)) + } +} + #[derive(Debug, Clone, Serialize, Deserialize)] pub struct HardwareChallengeRequest { - pub challenge: String, // TODO + pub challenge: ChallengeRequest, pub timestamp: SystemTime, } #[derive(Debug, Clone, Serialize, Deserialize)] pub struct HardwareChallengeResponse { - pub response: String, // TODO + pub response: ChallengeResponse, pub timestamp: SystemTime, } +impl From for Response { + fn from(response: HardwareChallengeResponse) -> Self { + Response::HardwareChallenge(response) + } +} + #[derive(Debug, Clone, Serialize, Deserialize)] pub enum InviteRequestUrl { MasterUrl(String), @@ -121,12 +142,32 @@ pub enum InviteResponse { Error(String), } +impl From for Response { + fn from(response: InviteResponse) -> Self { + Response::Invite(response) + } +} + #[derive(Debug, Clone, Serialize, Deserialize)] -pub struct GetTaskLogsResponse { - pub logs: Result, String>, +pub enum GetTaskLogsResponse { + Ok(String), + Error(String), +} + +impl From for Response { + fn from(response: GetTaskLogsResponse) -> Self { + Response::GetTaskLogs(response) + } } #[derive(Debug, Clone, Serialize, Deserialize)] -pub struct RestartResponse { - pub result: Result<(), String>, +pub enum RestartResponse { + Ok, + Error(String), +} + +impl From for Response { + fn from(response: RestartResponse) -> Self { + Response::Restart(response) + } } From d6c1a4af1d0dd7a3d236ab23d032f78ec336ef09 Mon Sep 17 00:00:00 2001 From: elizabeth Date: Wed, 9 Jul 2025 12:07:13 -0400 Subject: [PATCH 10/38] move messages to their own dir --- crates/p2p/src/lib.rs | 3 --- .../{challenge_message.rs => message/hardware_challenge.rs} | 0 crates/p2p/src/{message.rs => message/mod.rs} | 5 +++-- 3 files changed, 3 insertions(+), 5 deletions(-) rename crates/p2p/src/{challenge_message.rs => message/hardware_challenge.rs} (100%) rename crates/p2p/src/{message.rs => message/mod.rs} (98%) diff --git a/crates/p2p/src/lib.rs b/crates/p2p/src/lib.rs index 46105a36..208fb597 100644 --- a/crates/p2p/src/lib.rs +++ b/crates/p2p/src/lib.rs @@ -11,15 +11,12 @@ use libp2p::{identity, Transport}; use std::time::Duration; mod behaviour; -mod challenge_message; mod message; mod protocol; use behaviour::Behaviour; use protocol::Protocols; -// TODO: put these in a mod -pub use challenge_message::*; pub use message::*; pub type Libp2pIncomingMessage = libp2p::request_response::Message; diff --git a/crates/p2p/src/challenge_message.rs b/crates/p2p/src/message/hardware_challenge.rs similarity index 100% rename from crates/p2p/src/challenge_message.rs rename to crates/p2p/src/message/hardware_challenge.rs diff --git a/crates/p2p/src/message.rs b/crates/p2p/src/message/mod.rs similarity index 98% rename from crates/p2p/src/message.rs rename to crates/p2p/src/message/mod.rs index c0fd2d66..64486533 100644 --- a/crates/p2p/src/message.rs +++ b/crates/p2p/src/message/mod.rs @@ -2,8 +2,9 @@ use libp2p::PeerId; use serde::{Deserialize, Serialize}; use std::time::SystemTime; -use crate::ChallengeRequest; -use crate::ChallengeResponse; +mod hardware_challenge; + +pub use hardware_challenge::*; #[derive(Debug)] pub struct IncomingMessage { From ea46820b8a5dae8878e32726e80a65b8fee66911 Mon Sep 17 00:00:00 2001 From: elizabeth Date: Wed, 9 Jul 2025 12:16:19 -0400 Subject: [PATCH 11/38] add general request-response protocol --- crates/p2p/src/lib.rs | 5 +++ crates/p2p/src/message/mod.rs | 62 ++++++++++++++++++++++++++++++++++- crates/p2p/src/protocol.rs | 8 +++++ 3 files changed, 74 insertions(+), 1 deletion(-) diff --git a/crates/p2p/src/lib.rs b/crates/p2p/src/lib.rs index 208fb597..6e2efca3 100644 --- a/crates/p2p/src/lib.rs +++ b/crates/p2p/src/lib.rs @@ -221,6 +221,11 @@ impl NodeBuilder { self } + pub fn with_general(mut self) -> Self { + self.protocols = self.protocols.with_general(); + self + } + pub fn with_bootnode(mut self, bootnode: Multiaddr) -> Self { self.bootnodes.push(bootnode); self diff --git a/crates/p2p/src/message/mod.rs b/crates/p2p/src/message/mod.rs index 64486533..adff99ac 100644 --- a/crates/p2p/src/message/mod.rs +++ b/crates/p2p/src/message/mod.rs @@ -4,7 +4,7 @@ use std::time::SystemTime; mod hardware_challenge; -pub use hardware_challenge::*; +pub use hardware_challenge::*; #[derive(Debug)] pub struct IncomingMessage { @@ -31,6 +31,7 @@ pub enum Request { Invite(InviteRequest), GetTaskLogs, Restart, + General(GeneralRequest), } impl Request { @@ -46,6 +47,7 @@ pub enum Response { Invite(InviteResponse), GetTaskLogs(GetTaskLogsResponse), Restart(RestartResponse), + General(GeneralResponse), } impl Response { @@ -63,17 +65,35 @@ pub enum ValidatorAuthenticationRequest { Solution(ValidatorAuthenticationSolutionRequest), } +impl From for Request { + fn from(request: ValidatorAuthenticationRequest) -> Self { + Request::ValidatorAuthentication(request) + } +} + #[derive(Debug, Clone, Serialize, Deserialize)] pub enum ValidatorAuthenticationResponse { Initiation(ValidatorAuthenticationInitiationResponse), Solution(ValidatorAuthenticationSolutionResponse), } +impl From for Response { + fn from(response: ValidatorAuthenticationResponse) -> Self { + Response::ValidatorAuthentication(response) + } +} + #[derive(Debug, Clone, Serialize, Deserialize)] pub struct ValidatorAuthenticationInitiationRequest { pub message: String, } +impl From for Request { + fn from(request: ValidatorAuthenticationInitiationRequest) -> Self { + Request::ValidatorAuthentication(ValidatorAuthenticationRequest::Initiation(request)) + } +} + #[derive(Debug, Clone, Serialize, Deserialize)] pub struct ValidatorAuthenticationInitiationResponse { pub signature: String, @@ -91,6 +111,12 @@ pub struct ValidatorAuthenticationSolutionRequest { pub signature: String, } +impl From for Request { + fn from(request: ValidatorAuthenticationSolutionRequest) -> Self { + Request::ValidatorAuthentication(ValidatorAuthenticationRequest::Solution(request)) + } +} + #[derive(Debug, Clone, Serialize, Deserialize)] pub enum ValidatorAuthenticationSolutionResponse { Granted, @@ -109,6 +135,12 @@ pub struct HardwareChallengeRequest { pub timestamp: SystemTime, } +impl From for Request { + fn from(request: HardwareChallengeRequest) -> Self { + Request::HardwareChallenge(request) + } +} + #[derive(Debug, Clone, Serialize, Deserialize)] pub struct HardwareChallengeResponse { pub response: ChallengeResponse, @@ -137,6 +169,12 @@ pub struct InviteRequest { pub nonce: [u8; 32], } +impl From for Request { + fn from(request: InviteRequest) -> Self { + Request::Invite(request) + } +} + #[derive(Debug, Clone, Serialize, Deserialize)] pub enum InviteResponse { Ok, @@ -172,3 +210,25 @@ impl From for Response { Response::Restart(response) } } + +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct GeneralRequest { + data: Vec, +} + +impl From for Request { + fn from(request: GeneralRequest) -> Self { + Request::General(request) + } +} + +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct GeneralResponse { + data: Vec, +} + +impl From for Response { + fn from(response: GeneralResponse) -> Self { + Response::General(response) + } +} diff --git a/crates/p2p/src/protocol.rs b/crates/p2p/src/protocol.rs index 5186ac44..df423ef8 100644 --- a/crates/p2p/src/protocol.rs +++ b/crates/p2p/src/protocol.rs @@ -13,6 +13,8 @@ pub(crate) enum Protocol { GetTaskLogs, // any -> worker Restart, + // any -> any + General, } impl Protocol { @@ -25,6 +27,7 @@ impl Protocol { Protocol::Invite => StreamProtocol::new("/prime/invite/1.0.0"), Protocol::GetTaskLogs => StreamProtocol::new("/prime/get_task_logs/1.0.0"), Protocol::Restart => StreamProtocol::new("/prime/restart/1.0.0"), + Protocol::General => StreamProtocol::new("/prime/general/1.0.0"), } } } @@ -61,6 +64,11 @@ impl Protocols { self.0.insert(Protocol::Restart); self } + + pub(crate) fn with_general(mut self) -> Self { + self.0.insert(Protocol::General); + self + } } impl IntoIterator for Protocols { From 7288261a1a55f65d6463d6b5441f0e8e398106fb Mon Sep 17 00:00:00 2001 From: elizabeth Date: Wed, 9 Jul 2025 13:36:33 -0400 Subject: [PATCH 12/38] update SystemState to store libp2p keypair --- Cargo.lock | 1 - crates/p2p/src/lib.rs | 1 + crates/worker/Cargo.toml | 1 - crates/worker/src/cli/command.rs | 44 +- crates/worker/src/docker/service.rs | 17 +- crates/worker/src/docker/taskbridge/bridge.rs | 10 +- crates/worker/src/operations/compute_node.rs | 19 - .../src/operations/heartbeat/service.rs | 6 +- crates/worker/src/p2p/mod.rs | 20 +- crates/worker/src/p2p/service.rs | 736 ------------------ crates/worker/src/state/system_state.rs | 137 ++-- crates/worker/src/utils/mod.rs | 1 - crates/worker/src/utils/p2p.rs | 60 -- 13 files changed, 117 insertions(+), 936 deletions(-) delete mode 100644 crates/worker/src/p2p/service.rs delete mode 100644 crates/worker/src/utils/p2p.rs diff --git a/Cargo.lock b/Cargo.lock index 200a21e8..9964cd8d 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -10340,7 +10340,6 @@ dependencies = [ "hex", "homedir", "indicatif", - "iroh", "lazy_static", "libc", "log", diff --git a/crates/p2p/src/lib.rs b/crates/p2p/src/lib.rs index 6e2efca3..b9f1ac48 100644 --- a/crates/p2p/src/lib.rs +++ b/crates/p2p/src/lib.rs @@ -23,6 +23,7 @@ pub type Libp2pIncomingMessage = libp2p::request_response::Message; pub type PeerId = libp2p::PeerId; pub type Multiaddr = libp2p::Multiaddr; +pub type Keypair = libp2p::identity::Keypair; pub const PRIME_STREAM_PROTOCOL: libp2p::StreamProtocol = libp2p::StreamProtocol::new("/prime/1.0.0"); diff --git a/crates/worker/Cargo.toml b/crates/worker/Cargo.toml index 3ee03e12..43fc4a53 100644 --- a/crates/worker/Cargo.toml +++ b/crates/worker/Cargo.toml @@ -56,7 +56,6 @@ tracing = "0.1.41" tracing-subscriber = { version = "0.3.19", features = ["env-filter"] } tracing-log = "0.2.0" time = "0.3.41" -iroh = { workspace = true } rand_v8 = { workspace = true } rand_core_v6 = { workspace = true } dashmap = "6.1.0" diff --git a/crates/worker/src/cli/command.rs b/crates/worker/src/cli/command.rs index db28deb2..b5a56bdd 100644 --- a/crates/worker/src/cli/command.rs +++ b/crates/worker/src/cli/command.rs @@ -215,7 +215,7 @@ pub async fn execute_command( let state = Arc::new(SystemState::new( state_dir_overwrite.clone(), *disable_state_storing, - Some(*compute_pool_id), + *compute_pool_id, )); let private_key_provider = if let Some(key) = private_key_provider { @@ -513,7 +513,6 @@ pub async fn execute_command( .default_signer() .address() .to_string(), - state.get_p2p_seed(), *disable_host_network_mode, )); @@ -720,6 +719,7 @@ pub async fn execute_command( let port = 0; // TODO: cli option let validator_addresses = std::collections::HashSet::from_iter(allowed_addresses); let p2p_service = match crate::p2p::Service::new( + state.get_p2p_keypair().clone(), port, node_wallet_instance.clone(), validator_addresses, @@ -802,7 +802,7 @@ pub async fn execute_command( // Start monitoring compute node status on chain provider_ops.start_monitoring(provider_ops_cancellation); - let pool_id = state.compute_pool_id.unwrap_or(0); + let pool_id = state.get_compute_pool_id(); if let Err(err) = compute_node_ops.start_monitoring(cancellation_token.clone(), pool_id) { error!("❌ Failed to start node monitoring: {err}"); @@ -1009,7 +1009,7 @@ pub async fn execute_command( std::process::exit(1); } }; - let state = Arc::new(SystemState::new(None, true, None)); + /* Initialize dependencies - services, contracts, operations */ @@ -1023,18 +1023,18 @@ pub async fn execute_command( .build() .unwrap(); - let compute_node_ops = ComputeNodeOperations::new( - &provider_wallet_instance, - &node_wallet_instance, - contracts.clone(), - state.clone(), - ); + let provider_address = provider_wallet_instance.wallet.default_signer().address(); + let node_address = node_wallet_instance.wallet.default_signer().address(); let provider_ops = ProviderOperations::new(provider_wallet_instance.clone(), contracts.clone(), false); - let compute_node_exists = match compute_node_ops.check_compute_node_exists().await { - Ok(exists) => exists, + let compute_node_exists = match contracts + .compute_registry + .get_node(provider_address, node_address) + .await + { + Ok(_) => true, Err(e) => { Console::user_error(&format!("❌ Failed to check if compute node exists: {e}")); std::process::exit(1); @@ -1061,7 +1061,7 @@ pub async fn execute_command( std::process::exit(1); } } - match compute_node_ops.remove_compute_node().await { + match remove_compute_node(contracts, provider_address, node_address).await { Ok(_removed_node) => { Console::success("Compute node removed"); match provider_ops.reclaim_stake(U256::from(0)).await { @@ -1087,3 +1087,21 @@ pub async fn execute_command( } } } + +use alloy::primitives::Address; +use shared::web3::contracts::core::builder::Contracts; +use shared::web3::wallet::WalletProvider; + +async fn remove_compute_node( + contracts: Contracts, + provider_address: Address, + node_address: Address, +) -> Result> { + Console::title("🔄 Removing compute node"); + let remove_node_tx = contracts + .prime_network + .remove_compute_node(provider_address, node_address) + .await?; + Console::success(&format!("Remove node tx: {remove_node_tx:?}")); + Ok(true) +} diff --git a/crates/worker/src/docker/service.rs b/crates/worker/src/docker/service.rs index 63425e2d..da15b88e 100644 --- a/crates/worker/src/docker/service.rs +++ b/crates/worker/src/docker/service.rs @@ -24,7 +24,6 @@ pub(crate) struct DockerService { system_memory_mb: Option, task_bridge_socket_path: String, node_address: String, - p2p_seed: Option, } const TASK_PREFIX: &str = "prime-task"; @@ -39,7 +38,6 @@ impl DockerService { task_bridge_socket_path: String, storage_path: String, node_address: String, - p2p_seed: Option, disable_host_network_mode: bool, ) -> Self { let docker_manager = @@ -52,7 +50,6 @@ impl DockerService { system_memory_mb, task_bridge_socket_path, node_address, - p2p_seed, } } @@ -177,7 +174,6 @@ impl DockerService { let system_memory_mb = self.system_memory_mb; let task_bridge_socket_path = self.task_bridge_socket_path.clone(); let node_address = self.node_address.clone(); - let p2p_seed = self.p2p_seed; let handle = tokio::spawn(async move { let Some(payload) = state_clone.get_current_task().await else { return; @@ -185,11 +181,7 @@ impl DockerService { let cmd = match payload.cmd { Some(cmd_vec) => { cmd_vec.into_iter().map(|arg| { - let mut processed_arg = arg.replace("${SOCKET_PATH}", &task_bridge_socket_path); - if let Some(seed) = p2p_seed { - processed_arg = processed_arg.replace("${WORKER_P2P_SEED}", &seed.to_string()); - } - processed_arg + arg.replace("${SOCKET_PATH}", &task_bridge_socket_path) }).collect() } None => vec!["sleep".to_string(), "infinity".to_string()], @@ -199,10 +191,7 @@ impl DockerService { if let Some(env) = &payload.env_vars { // Clone env vars and replace ${SOCKET_PATH} in values for (key, value) in env.iter() { - let mut processed_value = value.replace("${SOCKET_PATH}", &task_bridge_socket_path); - if let Some(seed) = p2p_seed { - processed_value = processed_value.replace("${WORKER_P2P_SEED}", &seed.to_string()); - } + let processed_value = value.replace("${SOCKET_PATH}", &task_bridge_socket_path); env_vars.insert(key.clone(), processed_value); } } @@ -432,7 +421,6 @@ mod tests { "/tmp/com.prime.miner/metrics.sock".to_string(), "/tmp/test-storage".to_string(), Address::ZERO.to_string(), - None, false, ); let task = Task { @@ -481,7 +469,6 @@ mod tests { test_socket_path.to_string(), "/tmp/test-storage".to_string(), Address::ZERO.to_string(), - Some(12345), // p2p_seed for testing false, ); diff --git a/crates/worker/src/docker/taskbridge/bridge.rs b/crates/worker/src/docker/taskbridge/bridge.rs index 65a28f76..80b8aee7 100644 --- a/crates/worker/src/docker/taskbridge/bridge.rs +++ b/crates/worker/src/docker/taskbridge/bridge.rs @@ -473,7 +473,7 @@ mod tests { let temp_dir = tempdir()?; let socket_path = temp_dir.path().join("test.sock"); let metrics_store = Arc::new(MetricsStore::new()); - let state = Arc::new(SystemState::new(None, false, None)); + let state = Arc::new(SystemState::new(None, false, 0)); let bridge = TaskBridge::new( Some(socket_path.to_str().unwrap()), metrics_store.clone(), @@ -506,7 +506,7 @@ mod tests { let temp_dir = tempdir()?; let socket_path = temp_dir.path().join("test.sock"); let metrics_store = Arc::new(MetricsStore::new()); - let state = Arc::new(SystemState::new(None, false, None)); + let state = Arc::new(SystemState::new(None, false, 0)); let bridge = TaskBridge::new( Some(socket_path.to_str().unwrap()), metrics_store.clone(), @@ -541,7 +541,7 @@ mod tests { let temp_dir = tempdir()?; let socket_path = temp_dir.path().join("test.sock"); let metrics_store = Arc::new(MetricsStore::new()); - let state = Arc::new(SystemState::new(None, false, None)); + let state = Arc::new(SystemState::new(None, false, 0)); let bridge = TaskBridge::new( Some(socket_path.to_str().unwrap()), metrics_store.clone(), @@ -590,7 +590,7 @@ mod tests { let temp_dir = tempdir()?; let socket_path = temp_dir.path().join("test.sock"); let metrics_store = Arc::new(MetricsStore::new()); - let state = Arc::new(SystemState::new(None, false, None)); + let state = Arc::new(SystemState::new(None, false, 0)); let bridge = TaskBridge::new( Some(socket_path.to_str().unwrap()), metrics_store.clone(), @@ -639,7 +639,7 @@ mod tests { let temp_dir = tempdir()?; let socket_path = temp_dir.path().join("test.sock"); let metrics_store = Arc::new(MetricsStore::new()); - let state = Arc::new(SystemState::new(None, false, None)); + let state = Arc::new(SystemState::new(None, false, 0)); let bridge = TaskBridge::new( Some(socket_path.to_str().unwrap()), metrics_store.clone(), diff --git a/crates/worker/src/operations/compute_node.rs b/crates/worker/src/operations/compute_node.rs index 7cbdbda2..00f147a7 100644 --- a/crates/worker/src/operations/compute_node.rs +++ b/crates/worker/src/operations/compute_node.rs @@ -164,23 +164,4 @@ impl<'c> ComputeNodeOperations<'c> { Console::success(&format!("Add node tx: {add_node_tx:?}")); Ok(true) } - - pub(crate) async fn remove_compute_node(&self) -> Result> { - Console::title("🔄 Removing compute node"); - - if !self.check_compute_node_exists().await? { - return Ok(false); - } - - Console::progress("Removing compute node"); - let provider_address = self.provider_wallet.wallet.default_signer().address(); - let node_address = self.node_wallet.wallet.default_signer().address(); - let remove_node_tx = self - .contracts - .prime_network - .remove_compute_node(provider_address, node_address) - .await?; - Console::success(&format!("Remove node tx: {remove_node_tx:?}")); - Ok(true) - } } diff --git a/crates/worker/src/operations/heartbeat/service.rs b/crates/worker/src/operations/heartbeat/service.rs index 0d77d783..1b002cae 100644 --- a/crates/worker/src/operations/heartbeat/service.rs +++ b/crates/worker/src/operations/heartbeat/service.rs @@ -143,7 +143,7 @@ async fn send_heartbeat( wallet: Wallet, docker_service: Arc, metrics_store: Arc, - p2p_id: Option, + p2p_id: p2p::PeerId, ) -> Result { if endpoint.is_none() { return Err(HeartbeatError::RequestFailed); @@ -176,7 +176,7 @@ async fn send_heartbeat( .to_string(), ), timestamp: Some(ts), - p2p_id, + p2p_id: Some(p2p_id.to_string()), // TODO: this should always be `Some` task_details, } } else { @@ -188,7 +188,7 @@ async fn send_heartbeat( .to_string(), ), timestamp: Some(ts), - p2p_id, + p2p_id: Some(p2p_id.to_string()), // TODO: this should always be `Some` ..Default::default() } }; diff --git a/crates/worker/src/p2p/mod.rs b/crates/worker/src/p2p/mod.rs index 32894683..6a851c01 100644 --- a/crates/worker/src/p2p/mod.rs +++ b/crates/worker/src/p2p/mod.rs @@ -31,6 +31,7 @@ pub(crate) struct Service { impl Service { #[allow(clippy::too_many_arguments)] pub(crate) fn new( + keypair: p2p::Keypair, port: u16, wallet: Wallet, validator_addresses: HashSet, @@ -42,7 +43,8 @@ impl Service { cancellation_token: CancellationToken, ) -> Result { let (node, incoming_messages, outgoing_messages) = - build_p2p_node(port, cancellation_token.clone()).context("failed to build p2p node")?; + build_p2p_node(keypair, port, cancellation_token.clone()) + .context("failed to build p2p node")?; Ok(Self { node, incoming_messages, @@ -94,10 +96,12 @@ impl Service { } fn build_p2p_node( + keypair: p2p::Keypair, port: u16, cancellation_token: CancellationToken, ) -> Result<(Node, Receiver, Sender)> { NodeBuilder::new() + .with_keypair(keypair) .with_port(port) .with_validator_authentication() .with_hardware_challenge() @@ -393,14 +397,12 @@ async fn handle_invite_request( anyhow::bail!("heartbeat is currently running and in a compute pool"); } - if let Some(pool_id) = context.system_state.compute_pool_id { - if req.pool_id != pool_id { - anyhow::bail!( - "pool ID mismatch: expected {}, got {}", - pool_id, - req.pool_id - ); - } + if req.pool_id != context.system_state.get_compute_pool_id() { + anyhow::bail!( + "pool ID mismatch: expected {}, got {}", + context.system_state.get_compute_pool_id(), + req.pool_id + ); } let invite_bytes = hex::decode(&req.invite).context("failed to decode invite hex")?; diff --git a/crates/worker/src/p2p/service.rs b/crates/worker/src/p2p/service.rs deleted file mode 100644 index 51a68405..00000000 --- a/crates/worker/src/p2p/service.rs +++ /dev/null @@ -1,736 +0,0 @@ -use crate::console::Console; -use crate::docker::DockerService; -use crate::operations::heartbeat::service::HeartbeatService; -use crate::state::system_state::SystemState; -use alloy::primitives::{Address, FixedBytes, U256}; -use anyhow::Result; -use dashmap::DashMap; -use iroh::endpoint::Incoming; -use iroh::{Endpoint, RelayMode, SecretKey}; -use lazy_static::lazy_static; -use log::{debug, error, info, warn}; -use rand_v8::Rng; -use shared::models::challenge::calc_matrix; -use shared::models::invite::InviteRequest; -use shared::p2p::messages::MAX_MESSAGE_SIZE; -use shared::p2p::messages::{P2PMessage, P2PRequest, P2PResponse}; -use shared::p2p::protocol::PRIME_P2P_PROTOCOL; -use shared::security::request_signer::sign_message; -use shared::web3::contracts::core::builder::Contracts; -use shared::web3::contracts::helpers::utils::retry_call; -use shared::web3::contracts::structs::compute_pool::PoolStatus; -use shared::web3::wallet::{Wallet, WalletProvider}; -use std::str::FromStr; -use std::sync::Arc; -use std::time::{Duration, SystemTime}; -use tokio_util::sync::CancellationToken; - -lazy_static! { - static ref NONCE_CACHE: DashMap = DashMap::new(); -} - -#[derive(Clone)] -pub(crate) struct P2PContext { - pub docker_service: Arc, - pub heartbeat_service: Arc, - pub system_state: Arc, - pub contracts: Contracts, - pub node_wallet: Wallet, - pub provider_wallet: Wallet, -} - -#[derive(Clone)] -pub(crate) struct P2PService { - endpoint: Endpoint, - secret_key: SecretKey, - node_id: String, - listening_addrs: Vec, - cancellation_token: CancellationToken, - context: Option, - allowed_addresses: Vec
, - wallet: Wallet, -} - -enum EndpointLoopResult { - Shutdown, - EndpointClosed, -} - -impl P2PService { - /// Create a new P2P service with a unique worker identity - pub(crate) async fn new( - worker_p2p_seed: Option, - cancellation_token: CancellationToken, - context: Option, - wallet: Wallet, - allowed_addresses: Vec
, - ) -> Result { - // Generate or derive the secret key for this worker - let secret_key = if let Some(seed) = worker_p2p_seed { - // Derive from seed for deterministic identity - let mut seed_bytes = [0u8; 32]; - seed_bytes[..8].copy_from_slice(&seed.to_le_bytes()); - SecretKey::from_bytes(&seed_bytes) - } else { - let mut rng = rand_v8::thread_rng(); - SecretKey::generate(&mut rng) - }; - - let node_id = secret_key.public().to_string(); - info!("Starting P2P service with node ID: {node_id}"); - - // Create the endpoint - let endpoint = Endpoint::builder() - .secret_key(secret_key.clone()) - .alpns(vec![PRIME_P2P_PROTOCOL.to_vec()]) - .discovery_n0() - .relay_mode(RelayMode::Default) - .bind() - .await?; - - // Get listening addresses - let node_addr = endpoint.node_addr().await?; - let listening_addrs = node_addr - .direct_addresses - .iter() - .map(|addr| addr.to_string()) - .collect::>(); - - info!("P2P service listening on: {listening_addrs:?}"); - - Ok(Self { - endpoint, - secret_key, - node_id, - listening_addrs, - cancellation_token, - context, - allowed_addresses, - wallet, - }) - } - - /// Get the P2P node ID - pub(crate) fn node_id(&self) -> &str { - &self.node_id - } - - /// Get the listening addresses - pub(crate) fn listening_addresses(&self) -> &[String] { - &self.listening_addrs - } - - /// Recreate the endpoint with the same identity - async fn recreate_endpoint(&self) -> Result { - info!("Recreating P2P endpoint with node ID: {}", self.node_id); - - let endpoint = Endpoint::builder() - .secret_key(self.secret_key.clone()) - .alpns(vec![PRIME_P2P_PROTOCOL.to_vec()]) - .discovery_n0() - .relay_mode(RelayMode::Default) - .bind() - .await?; - - let node_addr = endpoint.node_addr().await?; - let listening_addrs = node_addr - .direct_addresses - .iter() - .map(|addr| addr.to_string()) - .collect::>(); - - info!("P2P endpoint recreated, listening on: {listening_addrs:?}"); - Ok(endpoint) - } - /// Start accepting incoming connections with automatic recovery - pub(crate) fn start(&self) -> Result<()> { - let service = Arc::new(self.clone()); - let cancellation_token = self.cancellation_token.clone(); - - tokio::spawn(async move { - service.run_with_recovery(cancellation_token).await; - }); - - Ok(()) - } - - /// Run the P2P service with automatic endpoint recovery - async fn run_with_recovery(&self, cancellation_token: CancellationToken) { - let mut endpoint = self.endpoint.clone(); - let mut retry_delay = Duration::from_secs(1); - const MAX_RETRY_DELAY: Duration = Duration::from_secs(60); - - loop { - tokio::select! { - _ = cancellation_token.cancelled() => { - info!("P2P service shutting down"); - break; - } - result = self.run_endpoint_loop(&endpoint, &cancellation_token) => { - match result { - EndpointLoopResult::Shutdown => break, - EndpointLoopResult::EndpointClosed => { - warn!("P2P endpoint closed, attempting recovery in {retry_delay:?}"); - - tokio::select! { - _ = cancellation_token.cancelled() => break, - _ = tokio::time::sleep(retry_delay) => {} - } - - match self.recreate_endpoint().await { - Ok(new_endpoint) => { - info!("P2P endpoint successfully recovered"); - endpoint = new_endpoint; - retry_delay = Duration::from_secs(1); - } - Err(e) => { - error!("Failed to recreate P2P endpoint: {e}"); - retry_delay = std::cmp::min(retry_delay * 2, MAX_RETRY_DELAY); - } - } - } - } - } - } - } - } - - /// Run the main endpoint acceptance loop - async fn run_endpoint_loop( - &self, - endpoint: &Endpoint, - cancellation_token: &CancellationToken, - ) -> EndpointLoopResult { - let context = self.context.clone(); - let allowed_addresses = self.allowed_addresses.clone(); - let wallet = self.wallet.clone(); - - loop { - tokio::select! { - _ = cancellation_token.cancelled() => { - return EndpointLoopResult::Shutdown; - } - incoming = endpoint.accept() => { - if let Some(incoming) = incoming { - tokio::spawn(Self::handle_connection(incoming, context.clone(), allowed_addresses.clone(), wallet.clone())); - } else { - return EndpointLoopResult::EndpointClosed; - } - } - } - } - } - - /// Handle an incoming connection - async fn handle_connection( - incoming: Incoming, - context: Option, - allowed_addresses: Vec
, - wallet: Wallet, - ) { - match incoming.await { - Ok(connection) => { - match connection.accept_bi().await { - Ok((send, recv)) => { - if let Err(e) = - Self::handle_stream(send, recv, context, allowed_addresses, wallet) - .await - { - error!("Error handling stream: {e}"); - } - // Wait a bit before closing to ensure client has processed response - tokio::time::sleep(tokio::time::Duration::from_millis(100)).await; - } - Err(e) => { - error!("Failed to accept bi-stream: {e}"); - connection.close(1u32.into(), b"stream error"); - } - } - } - Err(e) => { - // Only log as debug for protocol mismatches, which are expected - if e.to_string() - .contains("peer doesn't support any known protocol") - { - debug!("Connection attempt with unsupported protocol: {e}"); - } else { - error!("Failed to accept connection: {e}"); - } - } - } - } - - /// Read a message from the stream - async fn read_message(recv: &mut iroh::endpoint::RecvStream) -> Result { - // Read message length - let mut msg_len_bytes = [0u8; 4]; - match recv.read_exact(&mut msg_len_bytes).await { - Ok(_) => {} - Err(e) => { - debug!("Stream read ended: {e}"); - return Err(anyhow::anyhow!("Stream closed")); - } - } - let msg_len = u32::from_be_bytes(msg_len_bytes) as usize; - - // Enforce maximum message size - if msg_len > MAX_MESSAGE_SIZE { - error!("Message size {msg_len} exceeds maximum allowed size {MAX_MESSAGE_SIZE}"); - return Err(anyhow::anyhow!("Message too large")); - } - - let mut msg_bytes = vec![0u8; msg_len]; - recv.read_exact(&mut msg_bytes).await?; - - let request: P2PRequest = serde_json::from_slice(&msg_bytes) - .map_err(|e| anyhow::anyhow!("Failed to deserialize P2P request: {}", e))?; - - debug!("Received P2P request: {request:?}"); - Ok(request) - } - - async fn write_response( - send: &mut iroh::endpoint::SendStream, - response: P2PResponse, - ) -> Result<()> { - let response_bytes = serde_json::to_vec(&response)?; - - // Check response size before sending - if response_bytes.len() > MAX_MESSAGE_SIZE { - error!( - "Response size {} exceeds maximum allowed size {}", - response_bytes.len(), - MAX_MESSAGE_SIZE - ); - return Err(anyhow::anyhow!("Response too large")); - } - - send.write_all(&(response_bytes.len() as u32).to_be_bytes()) - .await?; - send.write_all(&response_bytes).await?; - Ok(()) - } - - /// Handle a bidirectional stream - async fn handle_stream( - mut send: iroh::endpoint::SendStream, - mut recv: iroh::endpoint::RecvStream, - context: Option, - allowed_addresses: Vec
, - wallet: Wallet, - ) -> Result<()> { - // Handle multiple messages in sequence - let mut is_authorized = false; - let mut current_challenge: Option = None; - - loop { - let Ok(request) = Self::read_message(&mut recv).await else { - break; - }; - - // Handle the request - let response = match request.message { - P2PMessage::Ping { nonce, .. } => { - info!("Received ping with nonce: {nonce}"); - P2PResponse::new( - request.id, - P2PMessage::Pong { - timestamp: SystemTime::now(), - nonce, - }, - ) - } - P2PMessage::RequestAuthChallenge { message } => { - // Generate a fresh cryptographically secure challenge message for this auth attempt - let challenge_bytes: [u8; 32] = rand_v8::rngs::OsRng.gen(); - let challenge_message = hex::encode(challenge_bytes); - - debug!("Received request auth challenge"); - let signature = match sign_message(&message, &wallet).await { - Ok(signature) => signature, - Err(e) => { - error!("Failed to sign message: {e}"); - return Err(anyhow::anyhow!("Failed to sign message: {}", e)); - } - }; - - // Store the challenge message in nonce cache to prevent replay - NONCE_CACHE.insert(challenge_message.clone(), SystemTime::now()); - - // Store the current challenge for this connection - current_challenge = Some(challenge_message.clone()); - - P2PResponse::new( - request.id, - P2PMessage::AuthChallenge { - message: challenge_message, - signed_message: signature, - }, - ) - } - P2PMessage::AuthSolution { signed_message } => { - // Get the challenge message for this connection - debug!("Received auth solution"); - let Some(challenge_message) = ¤t_challenge else { - warn!("No active challenge for auth solution"); - let response = P2PResponse::new(request.id, P2PMessage::AuthRejected {}); - Self::write_response(&mut send, response).await?; - continue; - }; - - // Check if challenge message has been used before (replay attack prevention) - if !NONCE_CACHE.contains_key(challenge_message) { - warn!("Challenge message not found or expired: {challenge_message}"); - let response = P2PResponse::new(request.id, P2PMessage::AuthRejected {}); - Self::write_response(&mut send, response).await?; - continue; - } - - // Clean up old nonces (older than 5 minutes) - let cutoff_time = SystemTime::now() - Duration::from_secs(300); - NONCE_CACHE.retain(|_, &mut timestamp| timestamp > cutoff_time); - - // Parse the signature - let Ok(parsed_signature) = - alloy::primitives::Signature::from_str(&signed_message) - else { - // Handle signature parsing error - let response = P2PResponse::new(request.id, P2PMessage::AuthRejected {}); - Self::write_response(&mut send, response).await?; - continue; - }; - - // Recover address from the challenge message that the client signed - let Ok(recovered_address) = - parsed_signature.recover_address_from_msg(challenge_message) - else { - // Handle address recovery error - let response = P2PResponse::new(request.id, P2PMessage::AuthRejected {}); - Self::write_response(&mut send, response).await?; - continue; - }; - - // Check if the recovered address is in allowed addresses - NONCE_CACHE.remove(challenge_message); - current_challenge = None; - if allowed_addresses.contains(&recovered_address) { - is_authorized = true; - P2PResponse::new(request.id, P2PMessage::AuthGranted {}) - } else { - P2PResponse::new(request.id, P2PMessage::AuthRejected {}) - } - } - P2PMessage::HardwareChallenge { challenge, .. } if is_authorized => { - info!("Received hardware challenge"); - let challenge_response = calc_matrix(&challenge); - P2PResponse::new( - request.id, - P2PMessage::HardwareChallengeResponse { - response: challenge_response, - timestamp: SystemTime::now(), - }, - ) - } - P2PMessage::Invite(invite) if is_authorized => { - if let Some(context) = &context { - let (status, error) = Self::handle_invite(invite, context).await; - P2PResponse::new(request.id, P2PMessage::InviteResponse { status, error }) - } else { - P2PResponse::new( - request.id, - P2PMessage::InviteResponse { - status: "error".to_string(), - error: Some("No context".to_string()), - }, - ) - } - } - P2PMessage::GetTaskLogs if is_authorized => { - if let Some(context) = &context { - let logs = context.docker_service.get_logs().await; - let response_logs = logs - .map(|log_string| vec![log_string]) - .map_err(|e| e.to_string()); - P2PResponse::new( - request.id, - P2PMessage::GetTaskLogsResponse { - logs: response_logs, - }, - ) - } else { - P2PResponse::new( - request.id, - P2PMessage::GetTaskLogsResponse { logs: Ok(vec![]) }, - ) - } - } - P2PMessage::RestartTask if is_authorized => { - if let Some(context) = &context { - let result = context.docker_service.restart_task().await; - let response_result = result.map_err(|e| e.to_string()); - P2PResponse::new( - request.id, - P2PMessage::RestartTaskResponse { - result: response_result, - }, - ) - } else { - P2PResponse::new( - request.id, - P2PMessage::RestartTaskResponse { result: Ok(()) }, - ) - } - } - _ => { - warn!("Unexpected message type"); - continue; - } - }; - - // Send response - Self::write_response(&mut send, response).await?; - } - - Ok(()) - } - - async fn handle_invite( - invite: InviteRequest, - context: &P2PContext, - ) -> (String, Option) { - if context.system_state.is_running().await { - return ( - "error".to_string(), - Some("Heartbeat is currently running and in a compute pool".to_string()), - ); - } - if let Some(pool_id) = context.system_state.compute_pool_id.clone() { - if invite.pool_id.to_string() != pool_id { - return ("error".to_string(), Some("Invalid pool ID".to_string())); - } - } - - let invite_bytes = match hex::decode(&invite.invite) { - Ok(bytes) => bytes, - Err(err) => { - error!("Failed to decode invite hex string: {err:?}"); - return ( - "error".to_string(), - Some("Invalid invite format".to_string()), - ); - } - }; - - if invite_bytes.len() < 65 { - return ( - "error".to_string(), - Some("Invite data is too short".to_string()), - ); - } - - let contracts = &context.contracts; - let wallet = &context.node_wallet; - let pool_id = U256::from(invite.pool_id); - - let bytes_array: [u8; 65] = match invite_bytes[..65].try_into() { - Ok(array) => array, - Err(_) => { - error!("Failed to convert invite bytes to fixed-size array"); - return ( - "error".to_string(), - Some("Invalid invite signature format".to_string()), - ); - } - }; - - let provider_address = context.provider_wallet.wallet.default_signer().address(); - - let pool_info = match contracts.compute_pool.get_pool_info(pool_id).await { - Ok(info) => info, - Err(err) => { - error!("Failed to get pool info: {err:?}"); - return ( - "error".to_string(), - Some("Failed to get pool information".to_string()), - ); - } - }; - - if let PoolStatus::PENDING = pool_info.status { - Console::user_error("Pool is pending - Invite is invalid"); - return ( - "error".to_string(), - Some("Pool is pending - Invite is invalid".to_string()), - ); - } - - let node_address = vec![wallet.wallet.default_signer().address()]; - let signatures = vec![FixedBytes::from(&bytes_array)]; - let nonces = vec![invite.nonce]; - let expirations = vec![invite.expiration]; - let call = match contracts.compute_pool.build_join_compute_pool_call( - pool_id, - provider_address, - node_address, - nonces, - expirations, - signatures, - ) { - Ok(call) => call, - Err(err) => { - error!("Failed to build join compute pool call: {err:?}"); - return ( - "error".to_string(), - Some("Failed to build join compute pool call".to_string()), - ); - } - }; - let provider = &context.provider_wallet.provider; - match retry_call(call, 3, provider.clone(), None).await { - Ok(result) => { - Console::section("WORKER JOINED COMPUTE POOL"); - Console::success(&format!( - "Successfully registered on chain with tx: {result}" - )); - Console::info( - "Status", - "Worker is now part of the compute pool and ready to receive tasks", - ); - } - Err(err) => { - error!("Failed to join compute pool: {err:?}"); - return ( - "error".to_string(), - Some(format!("Failed to join compute pool: {err}")), - ); - } - } - let endpoint = if let Some(url) = &invite.master_url { - format!("{url}/heartbeat") - } else { - match (&invite.master_ip, &invite.master_port) { - (Some(ip), Some(port)) => format!("http://{ip}:{port}/heartbeat"), - _ => { - error!("Missing master IP or port in invite request"); - return ( - "error".to_string(), - Some("Missing master IP or port".to_string()), - ); - } - } - }; - - if let Err(err) = context.heartbeat_service.start(endpoint).await { - error!("Failed to start heartbeat service: {err:?}"); - return ( - "error".to_string(), - Some("Failed to start heartbeat service".to_string()), - ); - } - - ("ok".to_string(), None) - } -} - -#[cfg(test)] -mod tests { - use rand_v8::Rng; - use serial_test::serial; - use shared::p2p::P2PClient; - use url::Url; - - use super::*; - - async fn setup_test_service( - include_addresses: bool, - ) -> (P2PService, P2PClient, Address, Address) { - let validator_wallet = shared::web3::wallet::Wallet::new( - "0000000000000000000000000000000000000000000000000000000000000001", - Url::parse("https://mainnet.infura.io/v3/9aa3d95b3bc440fa88ea12eaa4456161").unwrap(), - ) - .unwrap(); - let worker_wallet = shared::web3::wallet::Wallet::new( - "0000000000000000000000000000000000000000000000000000000000000002", - Url::parse("https://mainnet.infura.io/v3/9aa3d95b3bc440fa88ea12eaa4456161").unwrap(), - ) - .unwrap(); - let validator_wallet_address = validator_wallet.wallet.default_signer().address(); - let worker_wallet_address = worker_wallet.wallet.default_signer().address(); - let service = P2PService::new( - None, - CancellationToken::new(), - None, - worker_wallet, - if include_addresses { - vec![validator_wallet_address] - } else { - vec![] - }, - ) - .await - .unwrap(); - let client = P2PClient::new(validator_wallet.clone()).await.unwrap(); - ( - service, - client, - validator_wallet_address, - worker_wallet_address, - ) - } - - #[tokio::test] - #[serial] - async fn test_ping() { - let (service, client, _, worker_wallet_address) = setup_test_service(true).await; - let node_id = service.node_id().to_string(); - let addresses = service.listening_addresses().to_vec(); - let random_nonce = rand_v8::thread_rng().gen::(); - - tokio::spawn(async move { - service.start().unwrap(); - }); - - let ping = P2PMessage::Ping { - nonce: random_nonce, - timestamp: SystemTime::now(), - }; - - let response = client - .send_request(&node_id, &addresses, worker_wallet_address, ping, 20) - .await - .unwrap(); - - let response_nonce = match response { - P2PMessage::Pong { nonce, .. } => nonce, - _ => panic!("Expected Pong message"), - }; - assert_eq!(response_nonce, random_nonce); - } - #[tokio::test] - #[serial] - async fn test_auth_error() { - let (service, client, _, worker_wallet_address) = setup_test_service(false).await; - let node_id = service.node_id().to_string(); - let addresses = service.listening_addresses().to_vec(); - - tokio::spawn(async move { - service.start().unwrap(); - }); - - let ping = P2PMessage::Ping { - nonce: rand_v8::thread_rng().gen::(), - timestamp: SystemTime::now(), - }; - - // Since we set include_addresses to false, the client's wallet address - // is not in the allowed_addresses list, so we expect auth to be rejected - let result = client - .send_request(&node_id, &addresses, worker_wallet_address, ping, 20) - .await; - - assert!( - result.is_err(), - "Expected auth to be rejected but request succeeded" - ); - } -} diff --git a/crates/worker/src/state/system_state.rs b/crates/worker/src/state/system_state.rs index e419c870..e6ab26da 100644 --- a/crates/worker/src/state/system_state.rs +++ b/crates/worker/src/state/system_state.rs @@ -2,7 +2,6 @@ use anyhow::Result; use directories::ProjectDirs; use log::debug; use log::error; -use log::warn; use serde::{Deserialize, Serialize}; use std::fs; use std::path::Path; @@ -10,9 +9,6 @@ use std::path::PathBuf; use std::sync::Arc; use tokio::sync::RwLock; -use crate::utils::p2p::generate_iroh_node_id_from_seed; -use crate::utils::p2p::generate_random_seed; - const STATE_FILENAME: &str = "heartbeat_state.toml"; fn get_default_state_dir() -> Option { @@ -20,11 +16,31 @@ fn get_default_state_dir() -> Option { .map(|proj_dirs| proj_dirs.data_local_dir().to_string_lossy().into_owned()) } -#[derive(Debug, Clone, Serialize, Deserialize)] +#[derive(Debug, Clone)] struct PersistedSystemState { endpoint: Option, - p2p_seed: Option, - worker_p2p_seed: Option, + p2p_keypair: p2p::Keypair, +} + +impl Serialize for PersistedSystemState { + fn serialize(&self, serializer: S) -> Result + where + S: serde::Serializer, + { + serde_json::to_string(self) + .map_err(serde::ser::Error::custom) + .and_then(|s| serializer.serialize_str(&s)) + } +} + +impl<'de> Deserialize<'de> for PersistedSystemState { + fn deserialize(deserializer: D) -> Result + where + D: serde::Deserializer<'de>, + { + let s: String = Deserialize::deserialize(deserializer)?; + serde_json::from_str(&s).map_err(serde::de::Error::custom) + } } #[derive(Debug, Clone)] @@ -34,18 +50,15 @@ pub(crate) struct SystemState { endpoint: Arc>>, state_dir_overwrite: Option, disable_state_storing: bool, - pub compute_pool_id: Option, - - pub worker_p2p_seed: Option, - pub p2p_id: Option, - pub p2p_seed: Option, + compute_pool_id: u32, + p2p_keypair: p2p::Keypair, } impl SystemState { pub(crate) fn new( state_dir: Option, disable_state_storing: bool, - compute_pool_id: Option, + compute_pool_id: u32, ) -> Self { let default_state_dir = get_default_state_dir(); debug!("Default state dir: {default_state_dir:?}"); @@ -53,9 +66,10 @@ impl SystemState { .map(PathBuf::from) .or_else(|| default_state_dir.map(PathBuf::from)); debug!("State path: {state_path:?}"); + let mut endpoint = None; - let mut p2p_seed: Option = None; - let mut worker_p2p_seed: Option = None; + let mut p2p_keypair = None; + // Try to load state, log info if creating new file if !disable_state_storing { if let Some(path) = &state_path { @@ -67,31 +81,15 @@ impl SystemState { } else if let Ok(Some(loaded_state)) = SystemState::load_state(path) { debug!("Loaded previous state from {state_file:?}"); endpoint = loaded_state.endpoint; - p2p_seed = loaded_state.p2p_seed; - worker_p2p_seed = loaded_state.worker_p2p_seed; + p2p_keypair = Some(loaded_state.p2p_keypair); } else { debug!("Failed to load state from {state_file:?}"); } } } - if p2p_seed.is_none() { - let seed = generate_random_seed(); - p2p_seed = Some(seed); - } - // Generate p2p_id from seed if available - - let p2p_id: Option = - p2p_seed.and_then(|seed| match generate_iroh_node_id_from_seed(seed) { - Ok(id) => Some(id), - Err(_) => { - warn!("Failed to generate p2p_id from seed"); - None - } - }); - if worker_p2p_seed.is_none() { - let seed = generate_random_seed(); - worker_p2p_seed = Some(seed); + if p2p_keypair.is_none() { + p2p_keypair = Some(p2p::Keypair::generate_ed25519()); } Self { @@ -101,44 +99,34 @@ impl SystemState { state_dir_overwrite: state_path.clone(), disable_state_storing, compute_pool_id, - p2p_seed, - p2p_id, - worker_p2p_seed, + p2p_keypair: p2p_keypair.expect("p2p keypair must be Some at this point"), } } + fn save_state(&self, heartbeat_endpoint: Option) -> Result<()> { if !self.disable_state_storing { debug!("Saving state"); if let Some(state_dir) = &self.state_dir_overwrite { - // Get values without block_on - debug!("Saving p2p_seed: {:?}", self.p2p_seed); - - // Ensure p2p_seed is valid before creating state - if let Some(seed) = self.p2p_seed { - let state = PersistedSystemState { - endpoint: heartbeat_endpoint, - p2p_seed: Some(seed), - worker_p2p_seed: self.worker_p2p_seed, - }; - - debug!("state: {state:?}"); - - fs::create_dir_all(state_dir)?; - let state_path = state_dir.join(STATE_FILENAME); - - // Use JSON serialization instead of TOML - match serde_json::to_string_pretty(&state) { - Ok(json_string) => { - fs::write(&state_path, json_string)?; - debug!("Saved state to {state_path:?}"); - } - Err(e) => { - error!("Failed to serialize state: {e}"); - return Err(anyhow::anyhow!("Failed to serialize state: {}", e)); - } + let state = PersistedSystemState { + endpoint: heartbeat_endpoint, + p2p_keypair: self.p2p_keypair.clone(), + }; + + debug!("state: {state:?}"); + + fs::create_dir_all(state_dir)?; + let state_path = state_dir.join(STATE_FILENAME); + + // Use JSON serialization instead of TOML + match serde_json::to_string_pretty(&state) { + Ok(json_string) => { + fs::write(&state_path, json_string)?; + debug!("Saved state to {state_path:?}"); + } + Err(e) => { + error!("Failed to serialize state: {e}"); + return Err(anyhow::anyhow!("Failed to serialize state: {}", e)); } - } else { - warn!("Cannot save state: p2p_seed is None"); } } } @@ -160,12 +148,16 @@ impl SystemState { Ok(None) } - pub(crate) fn get_p2p_seed(&self) -> Option { - self.p2p_seed + pub(crate) fn get_compute_pool_id(&self) -> u32 { + self.compute_pool_id + } + + pub(crate) fn get_p2p_keypair(&self) -> &p2p::Keypair { + &self.p2p_keypair } - pub(crate) fn get_p2p_id(&self) -> Option { - self.p2p_id.clone() + pub(crate) fn get_p2p_id(&self) -> p2p::PeerId { + self.p2p_keypair.public().to_peer_id() } pub(crate) async fn update_last_heartbeat(&self) { @@ -238,9 +230,8 @@ mod tests { let state = SystemState::new( Some(temp_dir.path().to_string_lossy().to_string()), false, - None, + 0, ); - assert!(state.p2p_id.is_some()); let _ = state .set_running(true, Some("http://localhost:8080/heartbeat".to_string())) .await; @@ -266,7 +257,7 @@ mod tests { let state = SystemState::new( Some(temp_dir.path().to_string_lossy().to_string()), false, - None, + 0, ); assert!(!(state.is_running().await)); assert_eq!(state.get_heartbeat_endpoint().await, None); @@ -285,7 +276,7 @@ mod tests { let state = SystemState::new( Some(temp_dir.path().to_string_lossy().to_string()), false, - None, + 0, ); assert_eq!( state.get_heartbeat_endpoint().await, diff --git a/crates/worker/src/utils/mod.rs b/crates/worker/src/utils/mod.rs index 210f1e35..6a79dd07 100644 --- a/crates/worker/src/utils/mod.rs +++ b/crates/worker/src/utils/mod.rs @@ -1,2 +1 @@ pub(crate) mod logging; -pub(crate) mod p2p; diff --git a/crates/worker/src/utils/p2p.rs b/crates/worker/src/utils/p2p.rs deleted file mode 100644 index ef07b28c..00000000 --- a/crates/worker/src/utils/p2p.rs +++ /dev/null @@ -1,60 +0,0 @@ -use iroh::SecretKey; -use rand_v8::Rng; -use rand_v8::{rngs::StdRng, SeedableRng}; -use std::error::Error; - -/// Generate a random seed -pub(crate) fn generate_random_seed() -> u64 { - rand_v8::thread_rng().gen() -} - -// Generate an Iroh node ID from a seed -pub(crate) fn generate_iroh_node_id_from_seed(seed: u64) -> Result> { - // Create a deterministic RNG from the seed - let mut rng = StdRng::seed_from_u64(seed); - - // Generate the secret key using Iroh's method - // This matches exactly how it's done in your Node implementation - let secret_key = SecretKey::generate(&mut rng); - - // Get the node ID (public key) as a string - let node_id = secret_key.public().to_string(); - - Ok(node_id) -} - -#[cfg(test)] -mod tests { - use super::*; - - #[test] - fn test_generate_random_seed() { - let seed1 = generate_random_seed(); - let seed2 = generate_random_seed(); - - assert_ne!(seed1, seed2); - } - - #[test] - fn test_known_generation() { - let seed: u32 = 848364385; - let result = generate_iroh_node_id_from_seed(seed as u64).unwrap(); - assert_eq!( - result, - "6ba970180efbd83909282ac741085431f54aa516e1783852978bd529a400d0e9" - ); - assert_eq!(result.len(), 64); - } - - #[test] - fn test_deterministic_generation() { - // Same seed should generate same node_id - let seed = generate_random_seed(); - println!("seed: {}", seed); - let result1 = generate_iroh_node_id_from_seed(seed).unwrap(); - let result2 = generate_iroh_node_id_from_seed(seed).unwrap(); - println!("result1: {}", result1); - - assert_eq!(result1, result2); - } -} From 304f8a849477e47f6f8be35e40529c6fdbf30080 Mon Sep 17 00:00:00 2001 From: elizabeth Date: Wed, 9 Jul 2025 13:45:04 -0400 Subject: [PATCH 13/38] organize and remove unused deps --- Cargo.lock | 134 +-------------------------------------- crates/worker/Cargo.toml | 52 ++++++--------- 2 files changed, 24 insertions(+), 162 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 9964cd8d..807c07a6 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2292,16 +2292,6 @@ dependencies = [ "cipher", ] -[[package]] -name = "ctrlc" -version = "3.4.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "697b5419f348fd5ae2478e8018cb016c00a5881c7f46c717de98ffd135a5651c" -dependencies = [ - "nix 0.29.0", - "windows-sys 0.59.0", -] - [[package]] name = "curve25519-dalek" version = "4.1.3" @@ -3578,7 +3568,7 @@ dependencies = [ "futures-channel", "futures-io", "futures-util", - "idna 1.0.3", + "idna", "ipnet", "once_cell", "rand 0.8.5", @@ -3603,7 +3593,7 @@ dependencies = [ "futures-channel", "futures-io", "futures-util", - "idna 1.0.3", + "idna", "ipnet", "once_cell", "rand 0.9.1", @@ -4077,16 +4067,6 @@ version = "1.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b9e0384b61958566e926dc50660321d12159025e767c18e043daf26b70104c39" -[[package]] -name = "idna" -version = "0.4.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7d20d6b07bfbc108882d88ed8e37d39636dcc260e15e30c45e6ba089610b917c" -dependencies = [ - "unicode-bidi", - "unicode-normalization", -] - [[package]] name = "idna" version = "1.0.3" @@ -4141,12 +4121,6 @@ dependencies = [ "windows 0.52.0", ] -[[package]] -name = "if_chain" -version = "1.0.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cb56e1aa765b4b4f3aadfab769793b7087bb03a4ea4920644a6d238e2df5b9ed" - [[package]] name = "igd-next" version = "0.14.3" @@ -4241,19 +4215,6 @@ dependencies = [ "serde", ] -[[package]] -name = "indicatif" -version = "0.17.11" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "183b3088984b400f4cfac3620d5e076c84da5364016b4f49473de574b2586235" -dependencies = [ - "console", - "number_prefix", - "portable-atomic", - "unicode-width", - "web-time", -] - [[package]] name = "inout" version = "0.1.4" @@ -6077,12 +6038,6 @@ dependencies = [ "syn 2.0.101", ] -[[package]] -name = "number_prefix" -version = "0.4.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "830b246a0e5f20af87141b25c173cd1b609bd7779a4617d6ec582abaf90870f3" - [[package]] name = "nvml-wrapper" version = "0.10.0" @@ -6755,30 +6710,6 @@ dependencies = [ "toml_edit", ] -[[package]] -name = "proc-macro-error" -version = "1.0.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "da25490ff9892aab3fcf7c36f08cfb902dd3e71ca0f9f9517bea02a73a5ce38c" -dependencies = [ - "proc-macro-error-attr", - "proc-macro2", - "quote", - "syn 1.0.109", - "version_check", -] - -[[package]] -name = "proc-macro-error-attr" -version = "1.0.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a1be40180e52ecc98ad80b184934baf3d0d29f979574e439af5a55274b35f869" -dependencies = [ - "proc-macro2", - "quote", - "version_check", -] - [[package]] name = "proc-macro-error-attr2" version = "2.0.0" @@ -9239,12 +9170,6 @@ version = "2.8.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "75b844d17643ee918803943289730bec8aac480150456169e647ed0b576ba539" -[[package]] -name = "unicode-bidi" -version = "0.3.18" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5c1cb5db39152898a79168971543b1cb5020dff7fe43c8dc468b0885f5e29df5" - [[package]] name = "unicode-ident" version = "1.0.18" @@ -9338,7 +9263,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "32f8b686cadd1473f4bd0117a5d28d36b1ade384ea9b5069a1c40aefed7fda60" dependencies = [ "form_urlencoded", - "idna 1.0.3", + "idna", "percent-encoding", "serde", ] @@ -9469,48 +9394,6 @@ dependencies = [ "url", ] -[[package]] -name = "validator" -version = "0.16.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b92f40481c04ff1f4f61f304d61793c7b56ff76ac1469f1beb199b1445b253bd" -dependencies = [ - "idna 0.4.0", - "lazy_static", - "regex", - "serde", - "serde_derive", - "serde_json", - "url", - "validator_derive", -] - -[[package]] -name = "validator_derive" -version = "0.16.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bc44ca3088bb3ba384d9aecf40c6a23a676ce23e09bdaca2073d99c207f864af" -dependencies = [ - "if_chain", - "lazy_static", - "proc-macro-error", - "proc-macro2", - "quote", - "regex", - "syn 1.0.109", - "validator_types", -] - -[[package]] -name = "validator_types" -version = "0.16.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "111abfe30072511849c5910134e8baf8dc05de4c0e5903d681cbd5c9c4d611e3" -dependencies = [ - "proc-macro2", - "syn 1.0.109", -] - [[package]] name = "valuable" version = "0.1.1" @@ -10324,32 +10207,24 @@ dependencies = [ "alloy", "anyhow", "bollard", - "bytes", "chrono", "cid", "clap", "colored", "console", - "ctrlc", - "dashmap", "directories", "env_logger", "futures", - "futures-core", "futures-util", "hex", "homedir", - "indicatif", "lazy_static", "libc", "log", - "nalgebra", "nvml-wrapper", "p2p", "rand 0.8.5", "rand 0.9.1", - "rand_core 0.6.4", - "regex", "reqwest", "rust-ipfs", "serde", @@ -10366,15 +10241,12 @@ dependencies = [ "tokio", "tokio-stream", "tokio-util", - "toml", "tracing", - "tracing-log", "tracing-loki", "tracing-subscriber", "unicode-width", "url", "uuid", - "validator 0.16.1", ] [[package]] diff --git a/crates/worker/Cargo.toml b/crates/worker/Cargo.toml index 43fc4a53..f3f01b04 100644 --- a/crates/worker/Cargo.toml +++ b/crates/worker/Cargo.toml @@ -11,42 +11,38 @@ shared = { workspace = true } p2p = { workspace = true } actix-web = { workspace = true } -bollard = "0.18.1" +alloy = { workspace = true } +anyhow = { workspace = true } +cid = { workspace = true } clap = { workspace = true } -colored = "2.0" -lazy_static = "1.4" -regex = "1.10" +chrono = { workspace = true } +directories = { workspace = true } +env_logger = { workspace = true } +futures = { workspace = true } +futures-util = { workspace = true } +hex = { workspace = true } +log = { workspace = true } +rand_v8 = { workspace = true } +reqwest = { workspace = true, features = ["blocking"] } +rust-ipfs = { workspace = true } serde = { workspace = true } +serde_json = { workspace = true } +stun = { workspace = true } tokio = { workspace = true, features = ["full", "macros"] } +tokio-util = { workspace = true, features = ["rt"] } +url = { workspace = true } uuid = { workspace = true } -validator = { version = "0.16", features = ["derive"] } + +bollard = "0.18.1" +colored = "2.0" +lazy_static = "1.4" sysinfo = "0.30" libc = "0.2" nvml-wrapper = "0.10.0" -log = { workspace = true } -env_logger = { workspace = true } -futures-core = "0.3" -futures-util = { workspace = true } -alloy = { workspace = true } -url = { workspace = true } -serde_json = { workspace = true } -reqwest = { workspace = true, features = ["blocking"] } -hex = { workspace = true } console = "0.15.10" -indicatif = "0.17.9" -bytes = "1.9.0" -anyhow = { workspace = true } thiserror = "2.0.11" -toml = { workspace = true } -ctrlc = "3.4.5" -tokio-util = { workspace = true, features = ["rt"] } -futures = { workspace = true } -chrono = { workspace = true } serial_test = "0.5.1" -directories = { workspace = true } strip-ansi-escapes = "0.2.1" -nalgebra = { workspace = true } -stun = { workspace = true } sha2 = "0.10.8" unicode-width = "0.2.0" rand = "0.9.0" @@ -54,12 +50,6 @@ tempfile = "3.14.0" tracing-loki = "0.2.6" tracing = "0.1.41" tracing-subscriber = { version = "0.3.19", features = ["env-filter"] } -tracing-log = "0.2.0" time = "0.3.41" -rand_v8 = { workspace = true } -rand_core_v6 = { workspace = true } -dashmap = "6.1.0" tokio-stream = { version = "0.1.17", features = ["net"] } -rust-ipfs = { workspace = true } -cid = { workspace = true } homedir = "0.3" From 46ecca716e97374f12abbda15e9fbf77fc49ae5a Mon Sep 17 00:00:00 2001 From: elizabeth Date: Wed, 9 Jul 2025 13:48:42 -0400 Subject: [PATCH 14/38] add libp2p_port to cli --- crates/worker/src/cli/command.rs | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/crates/worker/src/cli/command.rs b/crates/worker/src/cli/command.rs index b5a56bdd..8f358252 100644 --- a/crates/worker/src/cli/command.rs +++ b/crates/worker/src/cli/command.rs @@ -14,6 +14,7 @@ use crate::services::discovery_updater::DiscoveryUpdater; use crate::state::system_state::SystemState; use crate::TaskHandles; use alloy::primitives::utils::format_ether; +use alloy::primitives::Address; use alloy::primitives::U256; use alloy::signers::local::PrivateKeySigner; use alloy::signers::Signer; @@ -22,8 +23,10 @@ use log::{error, info}; use shared::models::node::ComputeRequirements; use shared::models::node::Node; use shared::web3::contracts::core::builder::ContractBuilder; +use shared::web3::contracts::core::builder::Contracts; use shared::web3::contracts::structs::compute_pool::PoolStatus; use shared::web3::wallet::Wallet; +use shared::web3::wallet::WalletProvider; use std::str::FromStr; use std::sync::Arc; use std::time::Duration; @@ -54,6 +57,10 @@ pub enum Commands { #[arg(long, default_value = "8080")] port: u16, + /// Port for libp2p service + #[arg(long, default_value = "4002")] + libp2p_port: u16, + /// External IP address for the worker to advertise #[arg(long)] external_ip: Option, @@ -186,6 +193,7 @@ pub async fn execute_command( match command { Commands::Run { port: _, + libp2p_port, external_ip, compute_pool_id, dry_run: _, @@ -716,11 +724,10 @@ pub async fn execute_command( let mut allowed_addresses = vec![pool_info.creator, pool_info.compute_manager_key]; allowed_addresses.extend(validators); - let port = 0; // TODO: cli option let validator_addresses = std::collections::HashSet::from_iter(allowed_addresses); let p2p_service = match crate::p2p::Service::new( state.get_p2p_keypair().clone(), - port, + *libp2p_port, node_wallet_instance.clone(), validator_addresses, docker_service.clone(), @@ -1088,10 +1095,6 @@ pub async fn execute_command( } } -use alloy::primitives::Address; -use shared::web3::contracts::core::builder::Contracts; -use shared::web3::wallet::WalletProvider; - async fn remove_compute_node( contracts: Contracts, provider_address: Address, From 4358e3201b83a77a1f78e3eb60d516a60f7d678d Mon Sep 17 00:00:00 2001 From: elizabeth Date: Wed, 9 Jul 2025 15:19:49 -0400 Subject: [PATCH 15/38] serde for PersistedSystemState --- crates/worker/src/state/system_state.rs | 37 +++++++++++++------------ 1 file changed, 19 insertions(+), 18 deletions(-) diff --git a/crates/worker/src/state/system_state.rs b/crates/worker/src/state/system_state.rs index e6ab26da..bed32693 100644 --- a/crates/worker/src/state/system_state.rs +++ b/crates/worker/src/state/system_state.rs @@ -16,31 +16,32 @@ fn get_default_state_dir() -> Option { .map(|proj_dirs| proj_dirs.data_local_dir().to_string_lossy().into_owned()) } -#[derive(Debug, Clone)] +#[derive(Debug, Clone, Serialize, Deserialize)] struct PersistedSystemState { endpoint: Option, + #[serde( + serialize_with = "serialize_keypair", + deserialize_with = "deserialize_keypair" + )] p2p_keypair: p2p::Keypair, } -impl Serialize for PersistedSystemState { - fn serialize(&self, serializer: S) -> Result - where - S: serde::Serializer, - { - serde_json::to_string(self) - .map_err(serde::ser::Error::custom) - .and_then(|s| serializer.serialize_str(&s)) - } +fn serialize_keypair(keypair: &p2p::Keypair, serializer: S) -> Result +where + S: serde::Serializer, +{ + let serialized = keypair + .to_protobuf_encoding() + .map_err(serde::ser::Error::custom)?; + serializer.serialize_bytes(&serialized) } -impl<'de> Deserialize<'de> for PersistedSystemState { - fn deserialize(deserializer: D) -> Result - where - D: serde::Deserializer<'de>, - { - let s: String = Deserialize::deserialize(deserializer)?; - serde_json::from_str(&s).map_err(serde::de::Error::custom) - } +fn deserialize_keypair<'de, D>(deserializer: D) -> Result +where + D: serde::Deserializer<'de>, +{ + let serialized: Vec = Deserialize::deserialize(deserializer)?; + p2p::Keypair::from_protobuf_encoding(&serialized).map_err(serde::de::Error::custom) } #[derive(Debug, Clone)] From 577d843e117b88bc157e6304e7ce14c1ea0cd1c6 Mon Sep 17 00:00:00 2001 From: elizabeth Date: Wed, 9 Jul 2025 15:22:35 -0400 Subject: [PATCH 16/38] spawn message handler --- crates/worker/src/p2p/mod.rs | 18 ++++++++++++++---- 1 file changed, 14 insertions(+), 4 deletions(-) diff --git a/crates/worker/src/p2p/mod.rs b/crates/worker/src/p2p/mod.rs index 6a851c01..c2af2bbd 100644 --- a/crates/worker/src/p2p/mod.rs +++ b/crates/worker/src/p2p/mod.rs @@ -1,5 +1,6 @@ use anyhow::Context as _; use anyhow::Result; +use futures::stream::FuturesUnordered; use p2p::InviteRequestUrl; use p2p::Node; use p2p::NodeBuilder; @@ -71,6 +72,8 @@ impl Service { } pub(crate) async fn run(self) { + use futures::StreamExt as _; + let Self { node: _, mut incoming_messages, @@ -78,17 +81,24 @@ impl Service { context, } = self; + let mut message_handlers = FuturesUnordered::new(); + loop { tokio::select! { _ = cancellation_token.cancelled() => { break; } Some(message) = incoming_messages.recv() => { - // TODO: spawn and store handles - if let Err(e) = handle_incoming_message(message, context.clone()) - .await { + let context = context.clone(); + let handle = tokio::task::spawn( + handle_incoming_message(message, context) + ); + message_handlers.push(handle); + } + Some(res) = message_handlers.next() => { + if let Err(e) = res { tracing::error!("failed to handle incoming message: {e}"); - } + } } } } From 4285eaaf7d208a10321fb2a86ba1d875dbd952b5 Mon Sep 17 00:00:00 2001 From: elizabeth Date: Wed, 9 Jul 2025 16:40:55 -0400 Subject: [PATCH 17/38] add dial channel to p2p node; impl validator libp2p node --- Cargo.lock | 1 + Cargo.toml | 2 + crates/p2p/src/lib.rs | 40 +++- crates/validator/Cargo.toml | 2 + crates/validator/src/p2p/mod.rs | 380 ++++++++++++++++++++++++++++++++ 5 files changed, 414 insertions(+), 11 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index ae652ad4..23c65154 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -9452,6 +9452,7 @@ dependencies = [ "log", "mockito", "nalgebra", + "p2p", "prometheus 0.14.0", "rand 0.8.5", "rand 0.9.1", diff --git a/Cargo.toml b/Cargo.toml index 4279f156..d4ca7ab8 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -12,6 +12,8 @@ resolver = "2" [workspace.dependencies] shared = { path = "crates/shared" } +p2p = { path = "crates/p2p" } + actix-web = "4.9.0" clap = { version = "4.5.27", features = ["derive"] } serde = { version = "1.0.219", features = ["derive"] } diff --git a/crates/p2p/src/lib.rs b/crates/p2p/src/lib.rs index 6e2efca3..4a2b176e 100644 --- a/crates/p2p/src/lib.rs +++ b/crates/p2p/src/lib.rs @@ -1,6 +1,5 @@ use anyhow::Context; use anyhow::Result; -use libp2p::futures::stream::FuturesUnordered; use libp2p::noise; use libp2p::swarm::SwarmEvent; use libp2p::tcp; @@ -23,6 +22,9 @@ pub type Libp2pIncomingMessage = libp2p::request_response::Message; pub type PeerId = libp2p::PeerId; pub type Multiaddr = libp2p::Multiaddr; +pub type Keypair = libp2p::identity::Keypair; +pub type DialSender = + tokio::sync::mpsc::Sender<(Vec, tokio::sync::oneshot::Sender>)>; pub const PRIME_STREAM_PROTOCOL: libp2p::StreamProtocol = libp2p::StreamProtocol::new("/prime/1.0.0"); @@ -36,6 +38,9 @@ pub struct Node { bootnodes: Vec, cancellation_token: tokio_util::sync::CancellationToken, + dial_rx: + tokio::sync::mpsc::Receiver<(Vec, tokio::sync::oneshot::Sender>)>, + // channel for sending incoming messages to the consumer of this library incoming_message_tx: tokio::sync::mpsc::Sender, @@ -73,6 +78,7 @@ impl Node { mut swarm, bootnodes, cancellation_token, + mut dial_rx, incoming_message_tx, mut outgoing_message_rx, } = self; @@ -83,17 +89,12 @@ impl Node { .context("swarm failed to listen on multiaddr")?; } - let futures = FuturesUnordered::new(); for bootnode in bootnodes { - futures.push(swarm.dial(bootnode)) - } - let results: Vec<_> = futures.into_iter().collect(); - for result in results { - match result { + match swarm.dial(bootnode.clone()) { Ok(_) => {} Err(e) => { - // TODO: log this error - println!("failed to dial bootnode: {e:?}"); + // log error + println!("failed to dial bootnode {bootnode}: {e:?}"); } } } @@ -104,6 +105,19 @@ impl Node { println!("cancellation token triggered, shutting down node"); break Ok(()); } + Some((addrs, res_tx)) = dial_rx.recv() => { + let mut res = Ok(()); + for addr in addrs { + match swarm.dial(addr.clone()) { + Ok(_) => {} + Err(e) => { + res = Err(anyhow::anyhow!("failed to dial {addr}: {e:?}")); + break; + } + } + } + let _ = res_tx.send(res); + } Some(message) = outgoing_message_rx.recv() => { match message { OutgoingMessage::Request((peer, request)) => { @@ -254,6 +268,7 @@ impl NodeBuilder { self, ) -> Result<( Node, + DialSender, tokio::sync::mpsc::Receiver, tokio::sync::mpsc::Sender, )> { @@ -295,6 +310,7 @@ impl NodeBuilder { listen_addrs.push(listen_addr); } + let (dial_tx, dial_rx) = tokio::sync::mpsc::channel(100); let (incoming_message_tx, incoming_message_rx) = tokio::sync::mpsc::channel(100); let (outgoing_message_tx, outgoing_message_rx) = tokio::sync::mpsc::channel(100); @@ -304,10 +320,12 @@ impl NodeBuilder { swarm, listen_addrs, bootnodes, + dial_rx, incoming_message_tx, outgoing_message_rx, cancellation_token: cancellation_token.unwrap_or_default(), }, + dial_tx, incoming_message_rx, outgoing_message_tx, )) @@ -334,11 +352,11 @@ mod test { #[tokio::test] async fn two_nodes_can_connect_and_do_request_response() { - let (node1, mut incoming_message_rx1, outgoing_message_tx1) = + let (node1, _, mut incoming_message_rx1, outgoing_message_tx1) = NodeBuilder::new().with_get_task_logs().try_build().unwrap(); let node1_peer_id = node1.peer_id(); - let (node2, mut incoming_message_rx2, outgoing_message_tx2) = NodeBuilder::new() + let (node2, _, mut incoming_message_rx2, outgoing_message_tx2) = NodeBuilder::new() .with_get_task_logs() .with_bootnodes(node1.multiaddrs()) .try_build() diff --git a/crates/validator/Cargo.toml b/crates/validator/Cargo.toml index db3694ca..4143903d 100644 --- a/crates/validator/Cargo.toml +++ b/crates/validator/Cargo.toml @@ -7,6 +7,8 @@ edition.workspace = true workspace = true [dependencies] +p2p = { workspace = true} + actix-web = { workspace = true } alloy = { workspace = true } anyhow = { workspace = true } diff --git a/crates/validator/src/p2p/mod.rs b/crates/validator/src/p2p/mod.rs index 33dad50c..378b1080 100644 --- a/crates/validator/src/p2p/mod.rs +++ b/crates/validator/src/p2p/mod.rs @@ -1,3 +1,383 @@ pub(crate) mod client; pub use client::P2PClient; + +use anyhow::{bail, Context as _, Result}; +use futures::stream::FuturesUnordered; +use p2p::{ + IncomingMessage, Libp2pIncomingMessage, Node, NodeBuilder, OutgoingMessage, PeerId, + ValidatorAuthenticationInitiationRequest, ValidatorAuthenticationResponse, + ValidatorAuthenticationSolutionRequest, +}; +use shared::web3::wallet::Wallet; +use std::collections::HashMap; +use std::sync::Arc; +use tokio::sync::mpsc::{Receiver, Sender}; +use tokio::sync::RwLock; +use tokio_util::sync::CancellationToken; + +pub(crate) struct Service { + node: Node, + dial_tx: p2p::DialSender, + incoming_messages: Receiver, + hardware_challenge_rx: Receiver, + cancellation_token: CancellationToken, + context: Context, +} + +pub(crate) struct HardwareChallengeRequest { + worker_wallet_address: alloy::primitives::Address, + worker_p2p_id: String, + worker_addresses: Vec, + challenge: p2p::ChallengeRequest, +} + +impl Service { + #[allow(clippy::too_many_arguments)] + pub(crate) fn new( + keypair: p2p::Keypair, + port: u16, + cancellation_token: CancellationToken, + wallet: Wallet, + ) -> Result<(Self, Sender)> { + let (node, dial_tx, incoming_messages, outgoing_messages) = + build_p2p_node(keypair, port, cancellation_token.clone()) + .context("failed to build p2p node")?; + let (hardware_challenge_tx, hardware_challenge_rx) = tokio::sync::mpsc::channel(100); + + Ok(( + Self { + node, + dial_tx, + incoming_messages, + hardware_challenge_rx, + cancellation_token, + context: Context::new(outgoing_messages, wallet), + }, + hardware_challenge_tx, + )) + } + + pub(crate) fn peer_id(&self) -> PeerId { + self.node.peer_id() + } + + pub(crate) fn listen_addrs(&self) -> &[p2p::Multiaddr] { + self.node.listen_addrs() + } + + pub(crate) async fn run(self) { + use futures::StreamExt as _; + + let Self { + node: _, + dial_tx, + mut incoming_messages, + mut hardware_challenge_rx, + cancellation_token, + context, + } = self; + + let mut message_handlers = FuturesUnordered::new(); + + loop { + tokio::select! { + _ = cancellation_token.cancelled() => { + break; + } + Some(message) = hardware_challenge_rx.recv() => { + if let Err(e) = handle_outgoing_hardware_challenge(message, dial_tx.clone(), context.clone()) + .await { + log::error!("failed to handle outgoing hardware challenge: {e}"); + } + } + Some(message) = incoming_messages.recv() => { + let context = context.clone(); + let handle = tokio::task::spawn( + handle_incoming_message(message, context) + ); + message_handlers.push(handle); + } + Some(res) = message_handlers.next() => { + if let Err(e) = res { + log::error!("failed to handle incoming message: {e}"); + } + } + } + } + } +} + +fn build_p2p_node( + keypair: p2p::Keypair, + port: u16, + cancellation_token: CancellationToken, +) -> Result<( + Node, + p2p::DialSender, + Receiver, + Sender, +)> { + NodeBuilder::new() + .with_keypair(keypair) + .with_port(port) + .with_validator_authentication() + .with_hardware_challenge() + .with_cancellation_token(cancellation_token) + .try_build() +} + +#[derive(Clone)] +struct Context { + outgoing_messages: Sender, + ongoing_auth_requests: Arc>>, + wallet: Wallet, +} + +#[derive(Debug, Clone)] +struct OngoingHardwareChallenge { + worker_wallet_address: alloy::primitives::Address, + auth_challenge_request_message: String, + hardware_challenge: p2p::ChallengeRequest, +} + +impl Context { + fn new(outgoing_messages: Sender, wallet: Wallet) -> Self { + Self { + outgoing_messages, + ongoing_auth_requests: Arc::new(RwLock::new(HashMap::new())), + wallet, + } + } +} + +async fn handle_outgoing_hardware_challenge( + request: HardwareChallengeRequest, + dial_tx: p2p::DialSender, + context: Context, +) -> Result<()> { + use rand_v8::rngs::OsRng; + use rand_v8::Rng as _; + use std::str::FromStr as _; + + let HardwareChallengeRequest { + worker_wallet_address, + worker_p2p_id, + worker_addresses, + challenge, + } = request; + + log::debug!( + "sending hardware challenge to {} with addresses {:?}", + worker_p2p_id, + worker_addresses + ); + + // first, dial the worker + let worker_p2p_id = + PeerId::from_str(&worker_p2p_id).context("failed to parse worker p2p id")?; + + // ensure there's no ongoing challenge + // use write-lock to make this atomic until we finish sending the auth request and writing to the map + let mut ongoing_auth_requests = context.ongoing_auth_requests.write().await; + if ongoing_auth_requests.contains_key(&worker_p2p_id) { + bail!( + "ongoing hardware challenge for {} already exists", + worker_p2p_id + ); + } + + let multiaddrs = worker_addresses + .iter() + .filter_map(|addr| { + Some( + p2p::Multiaddr::from_str(addr) + .ok()? + .with_p2p(worker_p2p_id.clone()) + .ok()?, + ) + }) + .collect::>(); + if multiaddrs.is_empty() { + bail!("no valid multiaddrs for worker p2p id {worker_p2p_id}"); + } + + // TODO: we can improve this by checking if we're already connected to the peer before dialing + let (res_tx, res_rx) = tokio::sync::oneshot::channel(); + dial_tx + .send((multiaddrs, res_tx)) + .await + .context("failed to send dial request")?; + res_rx + .await + .context("failed to receive dial response")? + .context("failed to dial worker")?; + + // create the authentication challenge request message + let challenge_bytes: [u8; 32] = OsRng.gen(); + let auth_challenge_message: String = hex::encode(challenge_bytes); + + let req: p2p::Request = ValidatorAuthenticationInitiationRequest { + message: auth_challenge_message.clone(), + } + .into(); + let outgoing_message = req.into_outgoing_message(worker_p2p_id.clone()); + log::debug!( + "sending ValidatorAuthenticationInitiationRequest to {}", + worker_p2p_id + ); + context + .outgoing_messages + .send(outgoing_message) + .await + .context("failed to send outgoing message")?; + + // store the ongoing hardware challenge + let ongoing_challenge = OngoingHardwareChallenge { + worker_wallet_address, + auth_challenge_request_message: auth_challenge_message.clone(), + hardware_challenge: challenge, + }; + + ongoing_auth_requests.insert(worker_p2p_id.clone(), ongoing_challenge); + Ok(()) +} + +async fn handle_incoming_message(message: IncomingMessage, context: Context) -> Result<()> { + match message.message { + Libp2pIncomingMessage::Request { + request_id: _, + request, + channel: _, + } => { + log::error!( + "validator should not receive incoming requests: {request:?} from {}", + message.peer + ); + } + Libp2pIncomingMessage::Response { + request_id: _, + response, + } => { + log::debug!("received incoming response {response:?}"); + handle_incoming_response(message.peer, response, context) + .await + .context("failed to handle incoming response")?; + } + } + Ok(()) +} + +async fn handle_incoming_response( + from: PeerId, + response: p2p::Response, + context: Context, +) -> Result<()> { + match response { + p2p::Response::ValidatorAuthentication(resp) => { + log::debug!("received ValidatorAuthenticationSolutionResponse from {from}: {resp:?}"); + handle_validation_authentication_response(from, resp, context) + .await + .context("failed to handle validator authentication response")?; + } + p2p::Response::HardwareChallenge(resp) => { + log::debug!("received HardwareChallengeResponse from {from}: {resp:?}"); + // TODO + } + p2p::Response::Invite(_) => { + log::error!("validator should not receive `Invite` responses: from {from}"); + } + p2p::Response::GetTaskLogs(_) => { + log::error!("validator should not receive `GetTaskLogs` responses: from {from}"); + } + p2p::Response::Restart(_) => { + log::error!("validator should not receive `Restart` responses: from {from}"); + } + p2p::Response::General(_) => { + todo!() + } + } + + Ok(()) +} + +async fn handle_validation_authentication_response( + from: PeerId, + response: p2p::ValidatorAuthenticationResponse, + context: Context, +) -> Result<()> { + use shared::security::request_signer::sign_message; + use std::str::FromStr as _; + + match response { + ValidatorAuthenticationResponse::Initiation(req) => { + let ongoing_auth_requests = context.ongoing_auth_requests.read().await; + let Some(ongoing_challenge) = ongoing_auth_requests.get(&from) else { + bail!( + "no ongoing hardware challenge for peer {from}, cannot handle ValidatorAuthenticationInitiationResponse" + ); + }; + + let Ok(parsed_signature) = alloy::primitives::Signature::from_str(&req.signature) + else { + bail!("Failed to parse signature from server"); + }; + + // recover address from the challenge message that the server signed + let Ok(recovered_address) = parsed_signature + .recover_address_from_msg(&ongoing_challenge.auth_challenge_request_message) + else { + bail!("Failed to recover address from server signature") + }; + + // verify the recovered address matches the expected worker wallet address + if recovered_address != ongoing_challenge.worker_wallet_address { + bail!( + "Server address verification failed: expected {}, got {recovered_address}", + ongoing_challenge.worker_wallet_address, + ) + } + + log::debug!("auth challenge initiation response received from node: {from}"); + let signature = sign_message(&req.message, &context.wallet).await.unwrap(); + + let req: p2p::Request = ValidatorAuthenticationSolutionRequest { signature }.into(); + let req = req.into_outgoing_message(from); + context + .outgoing_messages + .send(req) + .await + .context("failed to send outgoing message")?; + } + ValidatorAuthenticationResponse::Solution(req) => { + let mut ongoing_auth_requests = context.ongoing_auth_requests.write().await; + let Some(ongoing_challenge) = ongoing_auth_requests.remove(&from) else { + bail!( + "no ongoing hardware challenge for peer {from}, cannot handle ValidatorAuthenticationSolutionResponse" + ); + }; + + match req { + p2p::ValidatorAuthenticationSolutionResponse::Granted => {} + p2p::ValidatorAuthenticationSolutionResponse::Rejected => { + log::debug!("auth challenge rejected by node: {from}"); + return Ok(()); + } + } + + // auth was granted, finally send the hardware challenge + let req: p2p::Request = p2p::HardwareChallengeRequest { + challenge: ongoing_challenge.hardware_challenge, + timestamp: std::time::SystemTime::now(), + } + .into(); + let req = req.into_outgoing_message(from); + context + .outgoing_messages + .send(req) + .await + .context("failed to send outgoing message")?; + } + } + Ok(()) +} From 75038851adb9bd5839ec852c395b9278ade4e94d Mon Sep 17 00:00:00 2001 From: elizabeth Date: Wed, 9 Jul 2025 16:54:12 -0400 Subject: [PATCH 18/38] fully implement hardware challenge flow --- crates/validator/src/p2p/mod.rs | 41 +++++++++++++++++++++++---------- 1 file changed, 29 insertions(+), 12 deletions(-) diff --git a/crates/validator/src/p2p/mod.rs b/crates/validator/src/p2p/mod.rs index 378b1080..49cb5b60 100644 --- a/crates/validator/src/p2p/mod.rs +++ b/crates/validator/src/p2p/mod.rs @@ -25,13 +25,6 @@ pub(crate) struct Service { context: Context, } -pub(crate) struct HardwareChallengeRequest { - worker_wallet_address: alloy::primitives::Address, - worker_p2p_id: String, - worker_addresses: Vec, - challenge: p2p::ChallengeRequest, -} - impl Service { #[allow(clippy::too_many_arguments)] pub(crate) fn new( @@ -127,18 +120,29 @@ fn build_p2p_node( .try_build() } +pub(crate) struct HardwareChallengeRequest { + worker_wallet_address: alloy::primitives::Address, + worker_p2p_id: String, + worker_addresses: Vec, + challenge: p2p::ChallengeRequest, + response_tx: tokio::sync::oneshot::Sender, +} + #[derive(Clone)] struct Context { outgoing_messages: Sender, - ongoing_auth_requests: Arc>>, + ongoing_auth_requests: Arc>>, + ongoing_hardware_challenges: + Arc>>>, wallet: Wallet, } -#[derive(Debug, Clone)] -struct OngoingHardwareChallenge { +#[derive(Debug)] +struct OngoingAuthChallenge { worker_wallet_address: alloy::primitives::Address, auth_challenge_request_message: String, hardware_challenge: p2p::ChallengeRequest, + hardware_challenge_response_tx: tokio::sync::oneshot::Sender, } impl Context { @@ -146,6 +150,7 @@ impl Context { Self { outgoing_messages, ongoing_auth_requests: Arc::new(RwLock::new(HashMap::new())), + ongoing_hardware_challenges: Arc::new(RwLock::new(HashMap::new())), wallet, } } @@ -165,6 +170,7 @@ async fn handle_outgoing_hardware_challenge( worker_p2p_id, worker_addresses, challenge, + response_tx, } = request; log::debug!( @@ -233,10 +239,11 @@ async fn handle_outgoing_hardware_challenge( .context("failed to send outgoing message")?; // store the ongoing hardware challenge - let ongoing_challenge = OngoingHardwareChallenge { + let ongoing_challenge = OngoingAuthChallenge { worker_wallet_address, auth_challenge_request_message: auth_challenge_message.clone(), hardware_challenge: challenge, + hardware_challenge_response_tx: response_tx, }; ongoing_auth_requests.insert(worker_p2p_id.clone(), ongoing_challenge); @@ -282,7 +289,13 @@ async fn handle_incoming_response( } p2p::Response::HardwareChallenge(resp) => { log::debug!("received HardwareChallengeResponse from {from}: {resp:?}"); - // TODO + let mut ongoing_hardware_challenges = context.ongoing_hardware_challenges.write().await; + let Some(response_tx) = ongoing_hardware_challenges.remove(&from) else { + bail!( + "no ongoing hardware challenge for peer {from}, cannot handle HardwareChallengeResponse" + ); + }; + let _ = response_tx.send(resp.response); // timestamp is silently dropped, is it actually used anywhere? } p2p::Response::Invite(_) => { log::error!("validator should not receive `Invite` responses: from {from}"); @@ -377,6 +390,10 @@ async fn handle_validation_authentication_response( .send(req) .await .context("failed to send outgoing message")?; + + let mut ongoing_hardware_challenges = context.ongoing_hardware_challenges.write().await; + ongoing_hardware_challenges + .insert(from, ongoing_challenge.hardware_challenge_response_tx); } } Ok(()) From d32f540e745810a43c38116b1d3bc19278529f56 Mon Sep 17 00:00:00 2001 From: elizabeth Date: Wed, 9 Jul 2025 17:15:28 -0400 Subject: [PATCH 19/38] upddate validator main to use libp2p node --- Cargo.lock | 1 - crates/validator/Cargo.toml | 1 - crates/validator/src/lib.rs | 2 +- crates/validator/src/main.rs | 31 ++++--- crates/validator/src/p2p/client.rs | 89 ------------------- crates/validator/src/p2p/mod.rs | 55 ++++-------- crates/validator/src/validators/hardware.rs | 66 +++++--------- .../src/validators/hardware_challenge.rs | 85 +++++++++--------- 8 files changed, 105 insertions(+), 225 deletions(-) delete mode 100644 crates/validator/src/p2p/client.rs diff --git a/Cargo.lock b/Cargo.lock index 23c65154..a4a2af90 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -9447,7 +9447,6 @@ dependencies = [ "env_logger", "futures", "hex", - "iroh", "lazy_static", "log", "mockito", diff --git a/crates/validator/Cargo.toml b/crates/validator/Cargo.toml index 4143903d..043c1558 100644 --- a/crates/validator/Cargo.toml +++ b/crates/validator/Cargo.toml @@ -18,7 +18,6 @@ directories = { workspace = true } env_logger = { workspace = true } futures = { workspace = true } hex = { workspace = true } -iroh = { workspace = true } rand_v8 = { workspace = true } lazy_static = "1.5.0" log = { workspace = true } diff --git a/crates/validator/src/lib.rs b/crates/validator/src/lib.rs index 760af2d1..9fac5ce8 100644 --- a/crates/validator/src/lib.rs +++ b/crates/validator/src/lib.rs @@ -5,7 +5,7 @@ mod validators; pub use metrics::export_metrics; pub use metrics::MetricsContext; -pub use p2p::P2PClient; +pub use p2p::Service as P2PService; pub use store::redis::RedisStore; pub use validators::hardware::HardwareValidator; pub use validators::synthetic_data::types::InvalidationType; diff --git a/crates/validator/src/main.rs b/crates/validator/src/main.rs index 55b3900d..d17f5004 100644 --- a/crates/validator/src/main.rs +++ b/crates/validator/src/main.rs @@ -23,7 +23,7 @@ use tokio_util::sync::CancellationToken; use url::Url; use validator::{ - export_metrics, HardwareValidator, InvalidationType, MetricsContext, P2PClient, RedisStore, + export_metrics, HardwareValidator, InvalidationType, MetricsContext, P2PService, RedisStore, SyntheticDataValidator, }; @@ -196,6 +196,10 @@ struct Args { /// Redis URL #[arg(long, default_value = "redis://localhost:6380")] redis_url: String, + + /// Libp2p port + #[arg(long, default_value = "4003")] + libp2p_port: u16, } #[tokio::main] @@ -269,19 +273,27 @@ async fn main() -> anyhow::Result<()> { MetricsContext::new(validator_wallet.address().to_string(), args.pool_id.clone()); // Initialize P2P client if enabled - let p2p_client = { - match P2PClient::new(validator_wallet.clone()).await { - Ok(client) => { - info!("P2P client initialized for testing"); - Some(client) + let keypair = p2p::Keypair::generate_ed25519(); + let (p2p_service, hardware_challenge_tx) = { + match P2PService::new( + keypair, + args.libp2p_port, + cancellation_token.clone(), + validator_wallet.clone(), + ) { + Ok(res) => { + info!("p2p service initialized successfully"); + res } Err(e) => { - error!("Failed to initialize P2P client: {e}"); - None + error!("failed to initialize p2p service: {e}"); + std::process::exit(1); } } }; + tokio::task::spawn(p2p_service.run()); + if let Some(pool_id) = args.pool_id.clone() { let pool = match contracts .compute_pool @@ -308,8 +320,7 @@ async fn main() -> anyhow::Result<()> { let contracts = contract_builder.build().unwrap(); - let hardware_validator = - HardwareValidator::new(&validator_wallet, contracts.clone(), p2p_client.as_ref()); + let hardware_validator = HardwareValidator::new(contracts.clone(), hardware_challenge_tx); let synthetic_validator = if let Some(pool_id) = args.pool_id.clone() { let penalty = U256::from(args.validator_penalty) * Unit::ETHER.wei(); diff --git a/crates/validator/src/p2p/client.rs b/crates/validator/src/p2p/client.rs deleted file mode 100644 index a0b21db1..00000000 --- a/crates/validator/src/p2p/client.rs +++ /dev/null @@ -1,89 +0,0 @@ -use alloy::primitives::Address; -use anyhow::Result; -use log::info; -use rand_v8::Rng; -use shared::models::challenge::{ChallengeRequest, ChallengeResponse}; -use shared::p2p::{client::P2PClient as SharedP2PClient, messages::P2PMessage}; -use shared::web3::wallet::Wallet; -use std::time::SystemTime; - -pub struct P2PClient { - shared_client: SharedP2PClient, -} - -impl P2PClient { - pub async fn new(wallet: Wallet) -> Result { - let shared_client = SharedP2PClient::new(wallet).await?; - Ok(Self { shared_client }) - } - - pub async fn ping_worker( - &self, - worker_wallet_address: Address, - worker_p2p_id: &str, - worker_addresses: &[String], - ) -> Result { - let nonce = rand_v8::thread_rng().gen::(); - - let response = self - .shared_client - .send_request( - worker_p2p_id, - worker_addresses, - worker_wallet_address, - P2PMessage::Ping { - timestamp: SystemTime::now(), - nonce, - }, - 10, - ) - .await?; - - match response { - P2PMessage::Pong { - nonce: returned_nonce, - .. - } => { - if returned_nonce == nonce { - info!("Received valid pong from worker {worker_p2p_id} with nonce: {nonce}"); - Ok(nonce) - } else { - Err(anyhow::anyhow!("Invalid nonce in pong response")) - } - } - _ => Err(anyhow::anyhow!("Unexpected response type for ping")), - } - } - - pub async fn send_hardware_challenge( - &self, - worker_wallet_address: Address, - worker_p2p_id: &str, - worker_addresses: &[String], - challenge: ChallengeRequest, - ) -> Result { - let response = self - .shared_client - .send_request( - worker_p2p_id, - worker_addresses, - worker_wallet_address, - P2PMessage::HardwareChallenge { - challenge, - timestamp: SystemTime::now(), - }, - 30, - ) - .await?; - - match response { - P2PMessage::HardwareChallengeResponse { response, .. } => { - info!("Received hardware challenge response from worker {worker_p2p_id}"); - Ok(response) - } - _ => Err(anyhow::anyhow!( - "Unexpected response type for hardware challenge" - )), - } - } -} diff --git a/crates/validator/src/p2p/mod.rs b/crates/validator/src/p2p/mod.rs index 49cb5b60..e756d33f 100644 --- a/crates/validator/src/p2p/mod.rs +++ b/crates/validator/src/p2p/mod.rs @@ -1,7 +1,3 @@ -pub(crate) mod client; - -pub use client::P2PClient; - use anyhow::{bail, Context as _, Result}; use futures::stream::FuturesUnordered; use p2p::{ @@ -16,8 +12,8 @@ use tokio::sync::mpsc::{Receiver, Sender}; use tokio::sync::RwLock; use tokio_util::sync::CancellationToken; -pub(crate) struct Service { - node: Node, +pub struct Service { + _node: Node, dial_tx: p2p::DialSender, incoming_messages: Receiver, hardware_challenge_rx: Receiver, @@ -27,7 +23,7 @@ pub(crate) struct Service { impl Service { #[allow(clippy::too_many_arguments)] - pub(crate) fn new( + pub fn new( keypair: p2p::Keypair, port: u16, cancellation_token: CancellationToken, @@ -40,7 +36,7 @@ impl Service { Ok(( Self { - node, + _node: node, dial_tx, incoming_messages, hardware_challenge_rx, @@ -51,19 +47,11 @@ impl Service { )) } - pub(crate) fn peer_id(&self) -> PeerId { - self.node.peer_id() - } - - pub(crate) fn listen_addrs(&self) -> &[p2p::Multiaddr] { - self.node.listen_addrs() - } - - pub(crate) async fn run(self) { + pub async fn run(self) { use futures::StreamExt as _; let Self { - node: _, + _node, dial_tx, mut incoming_messages, mut hardware_challenge_rx, @@ -120,12 +108,12 @@ fn build_p2p_node( .try_build() } -pub(crate) struct HardwareChallengeRequest { - worker_wallet_address: alloy::primitives::Address, - worker_p2p_id: String, - worker_addresses: Vec, - challenge: p2p::ChallengeRequest, - response_tx: tokio::sync::oneshot::Sender, +pub struct HardwareChallengeRequest { + pub(crate) worker_wallet_address: alloy::primitives::Address, + pub(crate) worker_p2p_id: String, + pub(crate) worker_addresses: Vec, + pub(crate) challenge: p2p::ChallengeRequest, + pub(crate) response_tx: tokio::sync::oneshot::Sender, } #[derive(Clone)] @@ -174,9 +162,7 @@ async fn handle_outgoing_hardware_challenge( } = request; log::debug!( - "sending hardware challenge to {} with addresses {:?}", - worker_p2p_id, - worker_addresses + "sending hardware challenge to {worker_p2p_id} with addresses {worker_addresses:?}" ); // first, dial the worker @@ -196,12 +182,10 @@ async fn handle_outgoing_hardware_challenge( let multiaddrs = worker_addresses .iter() .filter_map(|addr| { - Some( - p2p::Multiaddr::from_str(addr) + p2p::Multiaddr::from_str(addr) .ok()? - .with_p2p(worker_p2p_id.clone()) - .ok()?, - ) + .with_p2p(worker_p2p_id) + .ok() }) .collect::>(); if multiaddrs.is_empty() { @@ -227,10 +211,9 @@ async fn handle_outgoing_hardware_challenge( message: auth_challenge_message.clone(), } .into(); - let outgoing_message = req.into_outgoing_message(worker_p2p_id.clone()); + let outgoing_message = req.into_outgoing_message(worker_p2p_id); log::debug!( - "sending ValidatorAuthenticationInitiationRequest to {}", - worker_p2p_id + "sending ValidatorAuthenticationInitiationRequest to {worker_p2p_id}" ); context .outgoing_messages @@ -246,7 +229,7 @@ async fn handle_outgoing_hardware_challenge( hardware_challenge_response_tx: response_tx, }; - ongoing_auth_requests.insert(worker_p2p_id.clone(), ongoing_challenge); + ongoing_auth_requests.insert(worker_p2p_id, ongoing_challenge); Ok(()) } diff --git a/crates/validator/src/validators/hardware.rs b/crates/validator/src/validators/hardware.rs index 00736d34..877861da 100644 --- a/crates/validator/src/validators/hardware.rs +++ b/crates/validator/src/validators/hardware.rs @@ -1,15 +1,13 @@ use alloy::primitives::Address; +use anyhow::bail; use anyhow::Result; use log::{debug, error, info}; use shared::{ models::node::DiscoveryNode, - web3::{ - contracts::core::builder::Contracts, - wallet::{Wallet, WalletProvider}, - }, + web3::{contracts::core::builder::Contracts, wallet::WalletProvider}, }; -use crate::p2p::client::P2PClient; +use crate::p2p::HardwareChallengeRequest; use crate::validators::hardware_challenge::HardwareChallenge; /// Hardware validator implementation @@ -17,35 +15,27 @@ use crate::validators::hardware_challenge::HardwareChallenge; /// NOTE: This is a temporary implementation that will be replaced with a proper /// hardware validator in the near future. The current implementation only performs /// basic matrix multiplication challenges and does not verify actual hardware specs. -pub struct HardwareValidator<'a> { - wallet: &'a Wallet, +pub struct HardwareValidator { contracts: Contracts, - p2p_client: Option<&'a P2PClient>, + challenge_tx: tokio::sync::mpsc::Sender, } -impl<'a> HardwareValidator<'a> { +impl HardwareValidator { pub fn new( - wallet: &'a Wallet, contracts: Contracts, - p2p_client: Option<&'a P2PClient>, + challenge_tx: tokio::sync::mpsc::Sender, ) -> Self { Self { - wallet, contracts, - p2p_client, + challenge_tx, } } - async fn validate_node( - _wallet: &'a Wallet, - contracts: Contracts, - p2p_client: Option<&'a P2PClient>, - node: DiscoveryNode, - ) -> Result<()> { + async fn validate_node(&self, node: DiscoveryNode) -> Result<()> { let node_address = match node.id.trim_start_matches("0x").parse::
() { Ok(addr) => addr, Err(e) => { - return Err(anyhow::anyhow!("Failed to parse node address: {}", e)); + bail!("failed to parse node address: {e:?}"); } }; @@ -56,30 +46,22 @@ impl<'a> HardwareValidator<'a> { { Ok(addr) => addr, Err(e) => { - return Err(anyhow::anyhow!("Failed to parse provider address: {}", e)); + bail!("failed to parse provider address: {e:?}"); } }; // Perform hardware challenge - if let Some(p2p_client) = p2p_client { - let hardware_challenge = HardwareChallenge::new(p2p_client); - let challenge_result = hardware_challenge.challenge_node(&node).await; - - if let Err(e) = challenge_result { - println!("Challenge failed for node: {}, error: {}", node.id, e); - error!("Challenge failed for node: {}, error: {}", node.id, e); - return Err(anyhow::anyhow!("Failed to challenge node: {}", e)); - } - } else { - debug!( - "P2P client not available, skipping hardware challenge for node {}", - node.id - ); + let hardware_challenge = HardwareChallenge::new(self.challenge_tx.clone()); + let challenge_result = hardware_challenge.challenge_node(&node).await; + + if let Err(e) = challenge_result { + bail!("failed to challenge node: {e:?}"); } debug!("Sending validation transaction for node {}", node.id); - if let Err(e) = contracts + if let Err(e) = self + .contracts .prime_network .validate_node(provider_address, node_address) .await @@ -100,17 +82,11 @@ impl<'a> HardwareValidator<'a> { debug!("Non validated nodes: {non_validated:?}"); info!("Starting validation for {} nodes", non_validated.len()); - let contracts = self.contracts.clone(); - let wallet = self.wallet; - let p2p_client = self.p2p_client; - // Process non validated nodes sequentially as simple fix // to avoid nonce conflicts for now. Will sophisticate this in the future for node in non_validated { let node_id = node.id.clone(); - match HardwareValidator::validate_node(wallet, contracts.clone(), p2p_client, node) - .await - { + match self.validate_node(node).await { Ok(_) => (), Err(e) => { error!("Failed to validate node {node_id}: {e}"); @@ -134,7 +110,6 @@ mod tests { async fn test_challenge_node() { let coordinator_key = "0xdbda1821b80551c9d65939329250298aa3472ba22feea921c0cf5d620ea67b97"; let rpc_url: Url = Url::parse("http://localhost:8545").unwrap(); - let coordinator_wallet = Arc::new(Wallet::new(coordinator_key, rpc_url).unwrap()); let contracts = ContractBuilder::new(coordinator_wallet.provider()) @@ -145,7 +120,8 @@ mod tests { .build() .unwrap(); - let validator = HardwareValidator::new(&coordinator_wallet, contracts, None); + let (tx, _rx) = tokio::sync::mpsc::channel(100); + let validator = HardwareValidator::new(contracts, tx); let fake_discovery_node1 = DiscoveryNode { is_validated: false, diff --git a/crates/validator/src/validators/hardware_challenge.rs b/crates/validator/src/validators/hardware_challenge.rs index c881c542..de04a96a 100644 --- a/crates/validator/src/validators/hardware_challenge.rs +++ b/crates/validator/src/validators/hardware_challenge.rs @@ -1,40 +1,38 @@ -use crate::p2p::client::P2PClient; use alloy::primitives::Address; -use anyhow::{Error, Result}; +use anyhow::{bail, Context as _, Result}; use log::{error, info}; use rand::{rng, Rng}; -use shared::models::{ - challenge::{calc_matrix, ChallengeRequest, FixedF64}, - node::DiscoveryNode, -}; +use shared::models::node::DiscoveryNode; use std::str::FromStr; -pub(crate) struct HardwareChallenge<'a> { - p2p_client: &'a P2PClient, +use crate::p2p::HardwareChallengeRequest; + +pub(crate) struct HardwareChallenge { + challenge_tx: tokio::sync::mpsc::Sender, } -impl<'a> HardwareChallenge<'a> { - pub(crate) fn new(p2p_client: &'a P2PClient) -> Self { - Self { p2p_client } +impl HardwareChallenge { + pub(crate) fn new(challenge_tx: tokio::sync::mpsc::Sender) -> Self { + Self { challenge_tx } } - pub(crate) async fn challenge_node(&self, node: &DiscoveryNode) -> Result { + pub(crate) async fn challenge_node(&self, node: &DiscoveryNode) -> Result<()> { // Check if node has P2P ID and addresses let p2p_id = node .node .worker_p2p_id - .as_ref() + .clone() .ok_or_else(|| anyhow::anyhow!("Node {} does not have P2P ID", node.id))?; let p2p_addresses = node .node .worker_p2p_addresses - .as_ref() + .clone() .ok_or_else(|| anyhow::anyhow!("Node {} does not have P2P addresses", node.id))?; // create random challenge matrix let challenge_matrix = self.random_challenge(3, 3, 3, 3); - let challenge_expected = calc_matrix(&challenge_matrix); + let challenge_expected = p2p::calc_matrix(&challenge_matrix); // Add timestamp to the challenge let current_time = std::time::SystemTime::now() @@ -47,34 +45,35 @@ impl<'a> HardwareChallenge<'a> { let node_address = Address::from_str(&node.node.id) .map_err(|e| anyhow::anyhow!("Failed to parse node address {}: {}", node.node.id, e))?; + let (response_tx, response_rx) = tokio::sync::oneshot::channel(); + let hardware_challenge = HardwareChallengeRequest { + worker_wallet_address: node_address, + worker_p2p_id: p2p_id, + worker_addresses: p2p_addresses, + challenge: challenge_with_timestamp, + response_tx, + }; // Send challenge via P2P - match self - .p2p_client - .send_hardware_challenge( - node_address, - p2p_id, - p2p_addresses, - challenge_with_timestamp, - ) + self.challenge_tx + .send(hardware_challenge) + .await + .context("failed to send hardware challenge request to p2p service")?; + + let resp = response_rx .await - { - Ok(response) => { - if challenge_expected.result == response.result { - info!("Challenge for node {} successful", node.id); - Ok(0) - } else { - error!( - "Challenge failed for node {}: expected {:?}, got {:?}", - node.id, challenge_expected.result, response.result - ); - Err(anyhow::anyhow!("Node failed challenge")) - } - } - Err(e) => { - error!("Failed to send challenge to node {}: {}", node.id, e); - Err(anyhow::anyhow!("Failed to send challenge: {}", e)) - } + .context("failed to receive response from node")?; + + if challenge_expected.result == resp.result { + info!("Challenge for node {} successful", node.id); + } else { + error!( + "Challenge failed for node {}: expected {:?}, got {:?}", + node.id, challenge_expected.result, resp.result + ); + bail!("Node failed challenge"); } + + Ok(()) } fn random_challenge( @@ -83,7 +82,9 @@ impl<'a> HardwareChallenge<'a> { cols_a: usize, rows_b: usize, cols_b: usize, - ) -> ChallengeRequest { + ) -> p2p::ChallengeRequest { + use p2p::FixedF64; + let mut rng = rng(); let data_a_vec: Vec = (0..(rows_a * cols_a)) @@ -98,7 +99,7 @@ impl<'a> HardwareChallenge<'a> { let data_a: Vec = data_a_vec.iter().map(|x| FixedF64(*x)).collect(); let data_b: Vec = data_b_vec.iter().map(|x| FixedF64(*x)).collect(); - ChallengeRequest { + p2p::ChallengeRequest { rows_a, cols_a, data_a, From 56d6b1dab255712b95256879ff389b04eeca2a84 Mon Sep 17 00:00:00 2001 From: elizabeth Date: Wed, 9 Jul 2025 17:17:43 -0400 Subject: [PATCH 20/38] clean up deps --- Cargo.lock | 1 - crates/validator/Cargo.toml | 14 +++++++------- 2 files changed, 7 insertions(+), 8 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index a4a2af90..a93d4a0e 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -9465,7 +9465,6 @@ dependencies = [ "tempfile", "tokio", "tokio-util", - "toml", "url", ] diff --git a/crates/validator/Cargo.toml b/crates/validator/Cargo.toml index 043c1558..76969bb0 100644 --- a/crates/validator/Cargo.toml +++ b/crates/validator/Cargo.toml @@ -7,6 +7,7 @@ edition.workspace = true workspace = true [dependencies] +shared = { workspace = true } p2p = { workspace = true} actix-web = { workspace = true } @@ -19,23 +20,22 @@ env_logger = { workspace = true } futures = { workspace = true } hex = { workspace = true } rand_v8 = { workspace = true } -lazy_static = "1.5.0" log = { workspace = true } nalgebra = { workspace = true } -prometheus = "0.14.0" -rand = "0.9.0" redis = { workspace = true, features = ["tokio-comp"] } -redis-test = { workspace = true } -regex = "1.11.1" reqwest = { workspace = true } serde = { workspace = true } serde_json = { workspace = true } -shared = { workspace = true } tokio = { workspace = true } tokio-util = { workspace = true } -toml = { workspace = true } url = { workspace = true } +lazy_static = "1.5.0" +prometheus = "0.14.0" +rand = "0.9.0" +regex = "1.11.1" + [dev-dependencies] mockito = { workspace = true } +redis-test = { workspace = true } tempfile = "=3.14.0" From c6183d66264ce05676537901d8fdaaf78b5a6323 Mon Sep 17 00:00:00 2001 From: elizabeth Date: Wed, 9 Jul 2025 17:23:14 -0400 Subject: [PATCH 21/38] add authorized peer to map --- crates/worker/src/p2p/mod.rs | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/crates/worker/src/p2p/mod.rs b/crates/worker/src/p2p/mod.rs index c2af2bbd..cc905d83 100644 --- a/crates/worker/src/p2p/mod.rs +++ b/crates/worker/src/p2p/mod.rs @@ -208,7 +208,7 @@ async fn handle_incoming_request( .context("failed to handle ValidatorAuthenticationInitiationRequest")? } p2p::ValidatorAuthenticationRequest::Solution(req) => { - match handle_validator_authentication_initiation_solution(from, req, &context) + match handle_validator_authentication_solution_request(from, req, &context) .await { Ok(()) => p2p::ValidatorAuthenticationSolutionResponse::Granted.into(), @@ -288,7 +288,7 @@ async fn handle_validator_authentication_initiation_request( .into()) } -async fn handle_validator_authentication_initiation_solution( +async fn handle_validator_authentication_solution_request( from: PeerId, req: p2p::ValidatorAuthenticationSolutionRequest, context: &Context, @@ -317,6 +317,8 @@ async fn handle_validator_authentication_initiation_solution( anyhow::bail!("recovered address {recovered_address} is not in the list of authorized validator addresses"); } + let mut authorized_peers = context.authorized_peers.write().await; + authorized_peers.insert(from); Ok(()) } From 94e9e4d462aa021f29a498b6b0c88e927857cab3 Mon Sep 17 00:00:00 2001 From: elizabeth Date: Thu, 10 Jul 2025 12:08:18 -0400 Subject: [PATCH 22/38] implement dialing peers --- Cargo.toml | 2 ++ crates/p2p/src/lib.rs | 40 +++++++++++++++++++++++++++++----------- 2 files changed, 31 insertions(+), 11 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 4279f156..d4ca7ab8 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -12,6 +12,8 @@ resolver = "2" [workspace.dependencies] shared = { path = "crates/shared" } +p2p = { path = "crates/p2p" } + actix-web = "4.9.0" clap = { version = "4.5.27", features = ["derive"] } serde = { version = "1.0.219", features = ["derive"] } diff --git a/crates/p2p/src/lib.rs b/crates/p2p/src/lib.rs index 6e2efca3..4a2b176e 100644 --- a/crates/p2p/src/lib.rs +++ b/crates/p2p/src/lib.rs @@ -1,6 +1,5 @@ use anyhow::Context; use anyhow::Result; -use libp2p::futures::stream::FuturesUnordered; use libp2p::noise; use libp2p::swarm::SwarmEvent; use libp2p::tcp; @@ -23,6 +22,9 @@ pub type Libp2pIncomingMessage = libp2p::request_response::Message; pub type PeerId = libp2p::PeerId; pub type Multiaddr = libp2p::Multiaddr; +pub type Keypair = libp2p::identity::Keypair; +pub type DialSender = + tokio::sync::mpsc::Sender<(Vec, tokio::sync::oneshot::Sender>)>; pub const PRIME_STREAM_PROTOCOL: libp2p::StreamProtocol = libp2p::StreamProtocol::new("/prime/1.0.0"); @@ -36,6 +38,9 @@ pub struct Node { bootnodes: Vec, cancellation_token: tokio_util::sync::CancellationToken, + dial_rx: + tokio::sync::mpsc::Receiver<(Vec, tokio::sync::oneshot::Sender>)>, + // channel for sending incoming messages to the consumer of this library incoming_message_tx: tokio::sync::mpsc::Sender, @@ -73,6 +78,7 @@ impl Node { mut swarm, bootnodes, cancellation_token, + mut dial_rx, incoming_message_tx, mut outgoing_message_rx, } = self; @@ -83,17 +89,12 @@ impl Node { .context("swarm failed to listen on multiaddr")?; } - let futures = FuturesUnordered::new(); for bootnode in bootnodes { - futures.push(swarm.dial(bootnode)) - } - let results: Vec<_> = futures.into_iter().collect(); - for result in results { - match result { + match swarm.dial(bootnode.clone()) { Ok(_) => {} Err(e) => { - // TODO: log this error - println!("failed to dial bootnode: {e:?}"); + // log error + println!("failed to dial bootnode {bootnode}: {e:?}"); } } } @@ -104,6 +105,19 @@ impl Node { println!("cancellation token triggered, shutting down node"); break Ok(()); } + Some((addrs, res_tx)) = dial_rx.recv() => { + let mut res = Ok(()); + for addr in addrs { + match swarm.dial(addr.clone()) { + Ok(_) => {} + Err(e) => { + res = Err(anyhow::anyhow!("failed to dial {addr}: {e:?}")); + break; + } + } + } + let _ = res_tx.send(res); + } Some(message) = outgoing_message_rx.recv() => { match message { OutgoingMessage::Request((peer, request)) => { @@ -254,6 +268,7 @@ impl NodeBuilder { self, ) -> Result<( Node, + DialSender, tokio::sync::mpsc::Receiver, tokio::sync::mpsc::Sender, )> { @@ -295,6 +310,7 @@ impl NodeBuilder { listen_addrs.push(listen_addr); } + let (dial_tx, dial_rx) = tokio::sync::mpsc::channel(100); let (incoming_message_tx, incoming_message_rx) = tokio::sync::mpsc::channel(100); let (outgoing_message_tx, outgoing_message_rx) = tokio::sync::mpsc::channel(100); @@ -304,10 +320,12 @@ impl NodeBuilder { swarm, listen_addrs, bootnodes, + dial_rx, incoming_message_tx, outgoing_message_rx, cancellation_token: cancellation_token.unwrap_or_default(), }, + dial_tx, incoming_message_rx, outgoing_message_tx, )) @@ -334,11 +352,11 @@ mod test { #[tokio::test] async fn two_nodes_can_connect_and_do_request_response() { - let (node1, mut incoming_message_rx1, outgoing_message_tx1) = + let (node1, _, mut incoming_message_rx1, outgoing_message_tx1) = NodeBuilder::new().with_get_task_logs().try_build().unwrap(); let node1_peer_id = node1.peer_id(); - let (node2, mut incoming_message_rx2, outgoing_message_tx2) = NodeBuilder::new() + let (node2, _, mut incoming_message_rx2, outgoing_message_tx2) = NodeBuilder::new() .with_get_task_logs() .with_bootnodes(node1.multiaddrs()) .try_build() From a8af70634b017787edbefc5d0577a93dcfd0d539 Mon Sep 17 00:00:00 2001 From: elizabeth Date: Thu, 10 Jul 2025 16:50:45 -0400 Subject: [PATCH 23/38] use tracing --- Cargo.lock | 1 + Cargo.toml | 1 + crates/p2p/Cargo.toml | 1 + crates/p2p/src/behaviour.rs | 9 +++++---- crates/p2p/src/lib.rs | 18 +++++++----------- crates/worker/Cargo.toml | 2 +- 6 files changed, 16 insertions(+), 16 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index ae652ad4..c16f0570 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -6267,6 +6267,7 @@ dependencies = [ "serde", "tokio", "tokio-util", + "tracing", "void", ] diff --git a/Cargo.toml b/Cargo.toml index d4ca7ab8..1bc9e2ac 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -45,6 +45,7 @@ rand_core_v6 = { package = "rand_core", version = "0.6.4", features = ["std"] } ipld-core = "0.4" rust-ipfs = "0.14" cid = "0.11" +tracing = "0.1.41" [workspace.package] version = "0.3.11" diff --git a/crates/p2p/Cargo.toml b/crates/p2p/Cargo.toml index ba52d570..bb670107 100644 --- a/crates/p2p/Cargo.toml +++ b/crates/p2p/Cargo.toml @@ -12,6 +12,7 @@ nalgebra = {workspace = true} serde = {workspace = true} tokio = {workspace = true, features = ["sync"]} tokio-util = { workspace = true, features = ["rt"] } +tracing = { workspace = true } [lints] workspace = true diff --git a/crates/p2p/src/behaviour.rs b/crates/p2p/src/behaviour.rs index e2737d57..b114b61e 100644 --- a/crates/p2p/src/behaviour.rs +++ b/crates/p2p/src/behaviour.rs @@ -12,6 +12,7 @@ use libp2p::ping; use libp2p::request_response; use libp2p::swarm::NetworkBehaviour; use std::time::Duration; +use tracing::debug; use crate::message::IncomingMessage; use crate::message::{Request, Response}; @@ -152,19 +153,19 @@ impl BehaviourEvent { BehaviourEvent::Ping(_event) => {} BehaviourEvent::RequestResponse(event) => match event { request_response::Event::Message { peer, message } => { - println!("received message from peer {peer:?}: {message:?}"); + debug!("received message from peer {peer:?}: {message:?}"); // if this errors, user dropped their incoming message channel let _ = message_tx.send(IncomingMessage { peer, message }).await; } request_response::Event::ResponseSent { peer, request_id } => { - println!("response sent to peer {peer:?} for request ID {request_id:?}"); + debug!("response sent to peer {peer:?} for request ID {request_id:?}"); } request_response::Event::InboundFailure { peer, request_id, error, } => { - println!( + debug!( "inbound failure from peer {peer:?} for request ID {request_id:?}: {error}" ); } @@ -173,7 +174,7 @@ impl BehaviourEvent { request_id, error, } => { - println!( + debug!( "outbound failure to peer {peer:?} for request ID {request_id:?}: {error}" ); } diff --git a/crates/p2p/src/lib.rs b/crates/p2p/src/lib.rs index 4a2b176e..0a5637a9 100644 --- a/crates/p2p/src/lib.rs +++ b/crates/p2p/src/lib.rs @@ -8,6 +8,7 @@ use libp2p::Swarm; use libp2p::SwarmBuilder; use libp2p::{identity, Transport}; use std::time::Duration; +use tracing::debug; mod behaviour; mod message; @@ -93,8 +94,7 @@ impl Node { match swarm.dial(bootnode.clone()) { Ok(_) => {} Err(e) => { - // log error - println!("failed to dial bootnode {bootnode}: {e:?}"); + debug!("failed to dial bootnode {bootnode}: {e:?}"); } } } @@ -102,7 +102,7 @@ impl Node { loop { tokio::select! { _ = cancellation_token.cancelled() => { - println!("cancellation token triggered, shutting down node"); + debug!("cancellation token triggered, shutting down node"); break Ok(()); } Some((addrs, res_tx)) = dial_rx.recv() => { @@ -124,10 +124,8 @@ impl Node { swarm.behaviour_mut().request_response().send_request(&peer, request); } OutgoingMessage::Response((channel, response)) => { - println!("sending response on channel"); if let Err(e) = swarm.behaviour_mut().request_response().send_response(channel, response) { - // log error - println!("failed to send response: {e:?}"); + debug!("failed to send response: {e:?}"); } } } @@ -138,10 +136,10 @@ impl Node { listener_id: _, address, } => { - println!("new listen address: {address}"); + debug!("new listen address: {address}"); } SwarmEvent::ExternalAddrConfirmed { address } => { - println!("external address confirmed: {address}"); + debug!("external address confirmed: {address}"); } SwarmEvent::ConnectionClosed { peer_id, @@ -150,7 +148,7 @@ impl Node { connection_id: _, num_established: _, } => { - println!("connection closed with peer {peer_id}: {cause:?}"); + debug!("connection closed with peer {peer_id}: {cause:?}"); } SwarmEvent::Behaviour(event) => event.handle(incoming_message_tx.clone()).await, _ => continue, @@ -386,8 +384,6 @@ mod test { panic!("expected a GetTaskLogs request message"); }; - println!("received request from node1"); - // send response from node2->node1 let response = message::Response::GetTaskLogs(message::GetTaskLogsResponse::Ok("logs".to_string())); diff --git a/crates/worker/Cargo.toml b/crates/worker/Cargo.toml index 18596ba5..0f08e404 100644 --- a/crates/worker/Cargo.toml +++ b/crates/worker/Cargo.toml @@ -50,7 +50,7 @@ unicode-width = "0.2.0" rand = "0.9.0" tempfile = "3.14.0" tracing-loki = "0.2.6" -tracing = "0.1.41" +tracing = { workspace = true } tracing-subscriber = { version = "0.3.19", features = ["env-filter"] } tracing-log = "0.2.0" time = "0.3.41" From 15dc2c4a7d5ff5463257f303f1e75e6ba57b5561 Mon Sep 17 00:00:00 2001 From: elizabeth Date: Thu, 10 Jul 2025 18:33:27 -0400 Subject: [PATCH 24/38] move shared authentication service to shared --- Cargo.lock | 4 + crates/p2p/src/lib.rs | 12 +- crates/p2p/src/message/mod.rs | 22 +- crates/p2p/src/protocol.rs | 46 +- crates/shared/Cargo.toml | 4 + crates/shared/src/p2p/mod.rs | 3 + crates/shared/src/p2p/service.rs | 453 ++++++++++++++++++ crates/validator/src/p2p/mod.rs | 385 ++------------- .../src/validators/hardware_challenge.rs | 1 + 9 files changed, 582 insertions(+), 348 deletions(-) create mode 100644 crates/shared/src/p2p/service.rs diff --git a/Cargo.lock b/Cargo.lock index a93d4a0e..c54a0ad1 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -6223,6 +6223,7 @@ dependencies = [ "iroh", "log", "mockito", + "p2p", "prometheus 0.14.0", "rand 0.8.5", "rand 0.9.1", @@ -8232,12 +8233,14 @@ dependencies = [ "base64 0.22.1", "chrono", "dashmap", + "futures", "futures-util", "google-cloud-storage", "hex", "iroh", "log", "nalgebra", + "p2p", "rand 0.8.5", "rand 0.9.1", "redis", @@ -8246,6 +8249,7 @@ dependencies = [ "serde_json", "subtle", "tokio", + "tokio-util", "url", "utoipa", "uuid", diff --git a/crates/p2p/src/lib.rs b/crates/p2p/src/lib.rs index 4a2b176e..4f032bbf 100644 --- a/crates/p2p/src/lib.rs +++ b/crates/p2p/src/lib.rs @@ -14,9 +14,9 @@ mod message; mod protocol; use behaviour::Behaviour; -use protocol::Protocols; pub use message::*; +pub use protocol::*; pub type Libp2pIncomingMessage = libp2p::request_response::Message; pub type ResponseChannel = libp2p::request_response::ResponseChannel; @@ -120,7 +120,8 @@ impl Node { } Some(message) = outgoing_message_rx.recv() => { match message { - OutgoingMessage::Request((peer, request)) => { + OutgoingMessage::Request((peer, _addrs, request)) => { + // TODO: if we're not connected to the peer, we should dial it swarm.behaviour_mut().request_response().send_request(&peer, request); } OutgoingMessage::Response((channel, response)) => { @@ -240,6 +241,11 @@ impl NodeBuilder { self } + pub fn with_protocols(mut self, protocols: Protocols) -> Self { + self.protocols.join(protocols); + self + } + pub fn with_bootnode(mut self, bootnode: Multiaddr) -> Self { self.bootnodes.push(bootnode); self @@ -372,7 +378,7 @@ mod test { // send request from node1->node2 let request = message::Request::GetTaskLogs; outgoing_message_tx1 - .send(request.into_outgoing_message(node2_peer_id)) + .send(request.into_outgoing_message(node2_peer_id, vec![])) .await .unwrap(); let message = incoming_message_rx2.recv().await.unwrap(); diff --git a/crates/p2p/src/message/mod.rs b/crates/p2p/src/message/mod.rs index adff99ac..dc2403e3 100644 --- a/crates/p2p/src/message/mod.rs +++ b/crates/p2p/src/message/mod.rs @@ -1,3 +1,4 @@ +use crate::Protocol; use libp2p::PeerId; use serde::{Deserialize, Serialize}; use std::time::SystemTime; @@ -15,7 +16,7 @@ pub struct IncomingMessage { #[allow(clippy::large_enum_variant)] #[derive(Debug)] pub enum OutgoingMessage { - Request((PeerId, Request)), + Request((PeerId, Vec, Request)), Response( ( libp2p::request_response::ResponseChannel, @@ -35,8 +36,23 @@ pub enum Request { } impl Request { - pub fn into_outgoing_message(self, peer: PeerId) -> OutgoingMessage { - OutgoingMessage::Request((peer, self)) + pub fn into_outgoing_message( + self, + peer: PeerId, + multiaddrs: Vec, + ) -> OutgoingMessage { + OutgoingMessage::Request((peer, multiaddrs, self)) + } + + pub fn protocol(&self) -> Protocol { + match self { + Request::ValidatorAuthentication(_) => Protocol::ValidatorAuthentication, + Request::HardwareChallenge(_) => Protocol::HardwareChallenge, + Request::Invite(_) => Protocol::Invite, + Request::GetTaskLogs => Protocol::GetTaskLogs, + Request::Restart => Protocol::Restart, + Request::General(_) => Protocol::General, + } } } diff --git a/crates/p2p/src/protocol.rs b/crates/p2p/src/protocol.rs index df423ef8..ae839cec 100644 --- a/crates/p2p/src/protocol.rs +++ b/crates/p2p/src/protocol.rs @@ -2,7 +2,7 @@ use libp2p::StreamProtocol; use std::{collections::HashSet, hash::Hash}; #[derive(Debug, Clone, PartialEq, Eq, Hash)] -pub(crate) enum Protocol { +pub enum Protocol { // validator -> worker ValidatorAuthentication, // validator -> worker @@ -33,42 +33,70 @@ impl Protocol { } #[derive(Debug, Clone)] -pub(crate) struct Protocols(HashSet); +pub struct Protocols(HashSet); impl Protocols { - pub(crate) fn new() -> Self { + pub fn new() -> Self { Self(HashSet::new()) } - pub(crate) fn with_validator_authentication(mut self) -> Self { + pub fn has_validator_authentication(&self) -> bool { + self.0.contains(&Protocol::ValidatorAuthentication) + } + + pub fn has_hardware_challenge(&self) -> bool { + self.0.contains(&Protocol::HardwareChallenge) + } + + pub fn has_invite(&self) -> bool { + self.0.contains(&Protocol::Invite) + } + + pub fn has_get_task_logs(&self) -> bool { + self.0.contains(&Protocol::GetTaskLogs) + } + + pub fn has_restart(&self) -> bool { + self.0.contains(&Protocol::Restart) + } + + pub fn has_general(&self) -> bool { + self.0.contains(&Protocol::General) + } + + pub fn with_validator_authentication(mut self) -> Self { self.0.insert(Protocol::ValidatorAuthentication); self } - pub(crate) fn with_hardware_challenge(mut self) -> Self { + pub fn with_hardware_challenge(mut self) -> Self { self.0.insert(Protocol::HardwareChallenge); self } - pub(crate) fn with_invite(mut self) -> Self { + pub fn with_invite(mut self) -> Self { self.0.insert(Protocol::Invite); self } - pub(crate) fn with_get_task_logs(mut self) -> Self { + pub fn with_get_task_logs(mut self) -> Self { self.0.insert(Protocol::GetTaskLogs); self } - pub(crate) fn with_restart(mut self) -> Self { + pub fn with_restart(mut self) -> Self { self.0.insert(Protocol::Restart); self } - pub(crate) fn with_general(mut self) -> Self { + pub fn with_general(mut self) -> Self { self.0.insert(Protocol::General); self } + + pub(crate) fn join(&mut self, other: Protocols) { + self.0.extend(other.0); + } } impl IntoIterator for Protocols { diff --git a/crates/shared/Cargo.toml b/crates/shared/Cargo.toml index 9afdafff..4d3a8760 100644 --- a/crates/shared/Cargo.toml +++ b/crates/shared/Cargo.toml @@ -15,6 +15,8 @@ default = [] testnet = [] [dependencies] +p2p = { workspace = true} + tokio = { workspace = true } alloy = { workspace = true } alloy-provider = { workspace = true } @@ -40,3 +42,5 @@ iroh = { workspace = true } rand_v8 = { workspace = true } subtle = "2.6.1" utoipa = { version = "5.3.0", features = ["actix_extras", "chrono", "uuid"] } +futures = { workspace = true } +tokio-util = { workspace = true } diff --git a/crates/shared/src/p2p/mod.rs b/crates/shared/src/p2p/mod.rs index f505f3b1..cac69a8a 100644 --- a/crates/shared/src/p2p/mod.rs +++ b/crates/shared/src/p2p/mod.rs @@ -1,6 +1,9 @@ pub mod client; pub mod messages; pub mod protocol; +mod service; pub use client::P2PClient; pub use protocol::*; + +pub use service::*; diff --git a/crates/shared/src/p2p/service.rs b/crates/shared/src/p2p/service.rs new file mode 100644 index 00000000..f5a7bbe3 --- /dev/null +++ b/crates/shared/src/p2p/service.rs @@ -0,0 +1,453 @@ +use crate::web3::wallet::Wallet; +use anyhow::{bail, Context as _, Result}; +use futures::stream::FuturesUnordered; +use p2p::{ + IncomingMessage, Libp2pIncomingMessage, Node, NodeBuilder, OutgoingMessage, PeerId, Protocol, + Protocols, Response, ValidatorAuthenticationInitiationRequest, ValidatorAuthenticationResponse, + ValidatorAuthenticationSolutionRequest, +}; +use std::collections::HashMap; +use std::collections::HashSet; +use std::sync::Arc; +use tokio::sync::mpsc::{Receiver, Sender}; +use tokio::sync::RwLock; +use tokio_util::sync::CancellationToken; + +pub struct OutgoingRequest { + pub peer_wallet_address: alloy::primitives::Address, + pub request: p2p::Request, + pub peer_id: String, + pub multiaddrs: Vec, + pub response_tx: tokio::sync::oneshot::Sender, +} + +/// A p2p service implementation that is used by the validator and the orchestrator. +/// It handles the authentication protocol used before sending +/// requests to the worker. +pub struct Service { + _node: Node, + dial_tx: p2p::DialSender, + incoming_messages_rx: Receiver, + outgoing_messages_rx: Receiver, + cancellation_token: CancellationToken, + context: Context, +} + +impl Service { + pub fn new( + keypair: p2p::Keypair, + port: u16, + cancellation_token: CancellationToken, + wallet: Wallet, + protocols: Protocols, + ) -> Result<(Self, Sender)> { + let (node, dial_tx, incoming_messages_rx, outgoing_messages) = + build_p2p_node(keypair, port, cancellation_token.clone(), protocols.clone()) + .context("failed to build p2p node")?; + let (outgoing_messages_tx, outgoing_messages_rx) = tokio::sync::mpsc::channel(100); + + Ok(( + Self { + _node: node, + dial_tx, + incoming_messages_rx, + outgoing_messages_rx, + cancellation_token, + context: Context::new(outgoing_messages, wallet, protocols), + }, + outgoing_messages_tx, + )) + } + + pub async fn run(self) { + use futures::StreamExt as _; + + let Self { + _node, + dial_tx, + mut incoming_messages_rx, + mut outgoing_messages_rx, + cancellation_token, + context, + } = self; + + let mut message_handlers = FuturesUnordered::new(); + + loop { + tokio::select! { + _ = cancellation_token.cancelled() => { + break; + } + Some(message) = outgoing_messages_rx.recv() => { + if let Err(e) = handle_outgoing_message(message, dial_tx.clone(), context.clone()) + .await { + log::error!("failed to handle outgoing message: {e}"); + } + } + Some(message) = incoming_messages_rx.recv() => { + let context = context.clone(); + let handle = tokio::task::spawn( + handle_incoming_message(message, context) + ); + message_handlers.push(handle); + } + Some(res) = message_handlers.next() => { + if let Err(e) = res { + log::error!("failed to handle incoming message: {e}"); + } + } + } + } + } +} + +fn build_p2p_node( + keypair: p2p::Keypair, + port: u16, + cancellation_token: CancellationToken, + protocols: Protocols, +) -> Result<( + Node, + p2p::DialSender, + Receiver, + Sender, +)> { + NodeBuilder::new() + .with_keypair(keypair) + .with_port(port) + .with_validator_authentication() + .with_protocols(protocols) + .with_cancellation_token(cancellation_token) + .try_build() +} + +#[derive(Clone)] +struct Context { + // outbound message channel; receiver is held by libp2p node + outgoing_messages: Sender, + + // ongoing authentication requests + ongoing_auth_requests: Arc>>, + is_authenticated_with_peer: Arc>>, + + // this assumes that there is only one outbound request per protocol per peer at a time, + // is this a correct assumption? + // response channel is for sending the response back to the caller who initiated the request + ongoing_outbound_requests: + Arc>>>, + + wallet: Wallet, + protocols: Protocols, +} + +#[derive(Debug)] +struct OngoingAuthChallenge { + peer_wallet_address: alloy::primitives::Address, + auth_challenge_request_message: String, + outgoing_message: p2p::Request, + response_tx: tokio::sync::oneshot::Sender, +} + +impl Context { + fn new( + outgoing_messages: Sender, + wallet: Wallet, + protocols: Protocols, + ) -> Self { + Self { + outgoing_messages, + ongoing_auth_requests: Arc::new(RwLock::new(HashMap::new())), + is_authenticated_with_peer: Arc::new(RwLock::new(HashSet::new())), + ongoing_outbound_requests: Arc::new(RwLock::new(HashMap::new())), + wallet, + protocols, + } + } +} + +async fn handle_outgoing_message( + message: OutgoingRequest, + dial_tx: p2p::DialSender, + context: Context, +) -> Result<()> { + use rand_v8::rngs::OsRng; + use rand_v8::Rng as _; + use std::str::FromStr as _; + + let OutgoingRequest { + peer_wallet_address, + request, + peer_id, + multiaddrs, + response_tx, + } = message; + + let peer_id = PeerId::from_str(&peer_id).context("failed to parse peer id")?; + + // check if we're authenticated already + let is_authenticated_with_peer = context.is_authenticated_with_peer.read().await; + if is_authenticated_with_peer.contains(&peer_id) { + log::debug!( + "already authenticated with peer {peer_id}, skipping validation authentication" + ); + // multiaddresses are already known, as we've connected to them previously + context + .outgoing_messages + .send(request.into_outgoing_message(peer_id, vec![])) + .await + .context("failed to send outgoing message")?; + return Ok(()); + } + + log::debug!("sending validation authentication request to {peer_id}"); + + // first, dial the worker + // ensure there's no ongoing challenge + // use write-lock to make this atomic until we finish sending the auth request and writing to the map + let mut ongoing_auth_requests = context.ongoing_auth_requests.write().await; + if ongoing_auth_requests.contains_key(&peer_id) { + bail!("ongoing auth request for {} already exists", peer_id); + } + + let multiaddrs = multiaddrs + .iter() + .filter_map(|addr| p2p::Multiaddr::from_str(addr).ok()?.with_p2p(peer_id).ok()) + .collect::>(); + if multiaddrs.is_empty() { + bail!("no valid multiaddrs for peer id {peer_id}"); + } + + // TODO: we can improve this by checking if we're already connected to the peer before dialing + let (res_tx, res_rx) = tokio::sync::oneshot::channel(); + dial_tx + .send((multiaddrs.clone(), res_tx)) + .await + .context("failed to send dial request")?; + res_rx + .await + .context("failed to receive dial response")? + .context("failed to dial worker")?; + + // create the authentication challenge request message + let challenge_bytes: [u8; 32] = OsRng.gen(); + let auth_challenge_message: String = hex::encode(challenge_bytes); + + let req: p2p::Request = ValidatorAuthenticationInitiationRequest { + message: auth_challenge_message.clone(), + } + .into(); + let outgoing_message = req.into_outgoing_message(peer_id, multiaddrs); + log::debug!("sending ValidatorAuthenticationInitiationRequest to {peer_id}"); + context + .outgoing_messages + .send(outgoing_message) + .await + .context("failed to send outgoing message")?; + + // store the ongoing auth challenge + let ongoing_challenge = OngoingAuthChallenge { + peer_wallet_address, + auth_challenge_request_message: auth_challenge_message.clone(), + outgoing_message: request, + response_tx, + }; + + ongoing_auth_requests.insert(peer_id, ongoing_challenge); + Ok(()) +} + +async fn handle_incoming_message(message: IncomingMessage, context: Context) -> Result<()> { + match message.message { + Libp2pIncomingMessage::Request { + request_id: _, + request, + channel: _, + } => { + log::error!( + "node should not receive incoming requests: {request:?} from {}", + message.peer + ); + } + Libp2pIncomingMessage::Response { + request_id: _, + response, + } => { + log::debug!("received incoming response {response:?}"); + handle_incoming_response(message.peer, response, context) + .await + .context("failed to handle incoming response")?; + } + } + Ok(()) +} + +async fn handle_incoming_response( + from: PeerId, + response: p2p::Response, + context: Context, +) -> Result<()> { + match response { + p2p::Response::ValidatorAuthentication(resp) => { + log::debug!("received ValidatorAuthenticationSolutionResponse from {from}: {resp:?}"); + handle_validation_authentication_response(from, resp, context) + .await + .context("failed to handle validator authentication response")?; + } + p2p::Response::HardwareChallenge(ref resp) => { + if !context.protocols.has_hardware_challenge() { + bail!("received HardwareChallengeResponse from {from}, but hardware challenge protocol is not enabled"); + } + + log::debug!("received HardwareChallengeResponse from {from}: {resp:?}"); + let mut ongoing_outbound_requests = context.ongoing_outbound_requests.write().await; + let Some(response_tx) = + ongoing_outbound_requests.remove(&(from, Protocol::HardwareChallenge)) + else { + bail!( + "no ongoing hardware challenge for peer {from}, cannot handle HardwareChallengeResponse" + ); + }; + let _ = response_tx.send(response); + } + p2p::Response::Invite(ref resp) => { + if !context.protocols.has_invite() { + bail!("received InviteResponse from {from}, but invite protocol is not enabled"); + } + + log::debug!("received InviteResponse from {from}: {resp:?}"); + let mut ongoing_outbound_requests = context.ongoing_outbound_requests.write().await; + let Some(response_tx) = ongoing_outbound_requests.remove(&(from, Protocol::Invite)) + else { + bail!("no ongoing invite for peer {from}, cannot handle InviteResponse"); + }; + let _ = response_tx.send(response); + } + p2p::Response::GetTaskLogs(ref resp) => { + if !context.protocols.has_get_task_logs() { + bail!("received GetTaskLogsResponse from {from}, but get task logs protocol is not enabled"); + } + + log::debug!("received GetTaskLogsResponse from {from}: {resp:?}"); + let mut ongoing_outbound_requests = context.ongoing_outbound_requests.write().await; + let Some(response_tx) = + ongoing_outbound_requests.remove(&(from, Protocol::GetTaskLogs)) + else { + bail!("no ongoing GetTaskLogs for peer {from}, cannot handle GetTaskLogsResponse"); + }; + let _ = response_tx.send(response); + } + p2p::Response::Restart(ref resp) => { + if !context.protocols.has_restart() { + bail!("received RestartResponse from {from}, but restart protocol is not enabled"); + } + + log::debug!("received RestartResponse from {from}: {resp:?}"); + let mut ongoing_outbound_requests = context.ongoing_outbound_requests.write().await; + let Some(response_tx) = ongoing_outbound_requests.remove(&(from, Protocol::Restart)) + else { + bail!("no ongoing Restart for peer {from}, cannot handle RestartResponse"); + }; + let _ = response_tx.send(response); + } + p2p::Response::General(ref resp) => { + if !context.protocols.has_general() { + bail!("received GeneralResponse from {from}, but general protocol is not enabled"); + } + + log::debug!("received GeneralResponse from {from}: {resp:?}"); + let mut ongoing_outbound_requests = context.ongoing_outbound_requests.write().await; + let Some(response_tx) = ongoing_outbound_requests.remove(&(from, Protocol::General)) + else { + bail!("no ongoing General for peer {from}, cannot handle GeneralResponse"); + }; + let _ = response_tx.send(response); + } + } + + Ok(()) +} + +async fn handle_validation_authentication_response( + from: PeerId, + response: p2p::ValidatorAuthenticationResponse, + context: Context, +) -> Result<()> { + use crate::security::request_signer::sign_message; + use std::str::FromStr as _; + + match response { + ValidatorAuthenticationResponse::Initiation(req) => { + let ongoing_auth_requests = context.ongoing_auth_requests.read().await; + let Some(ongoing_challenge) = ongoing_auth_requests.get(&from) else { + bail!( + "no ongoing hardware challenge for peer {from}, cannot handle ValidatorAuthenticationInitiationResponse" + ); + }; + + let Ok(parsed_signature) = alloy::primitives::Signature::from_str(&req.signature) + else { + bail!("failed to parse signature from response"); + }; + + // recover address from the challenge message that the peer signed + let Ok(recovered_address) = parsed_signature + .recover_address_from_msg(&ongoing_challenge.auth_challenge_request_message) + else { + bail!("Failed to recover address from response signature") + }; + + // verify the recovered address matches the expected worker wallet address + if recovered_address != ongoing_challenge.peer_wallet_address { + bail!( + "peer address verification failed: expected {}, got {recovered_address}", + ongoing_challenge.peer_wallet_address, + ) + } + + log::debug!("auth challenge initiation response received from node: {from}"); + let signature = sign_message(&req.message, &context.wallet).await.unwrap(); + + let req: p2p::Request = ValidatorAuthenticationSolutionRequest { signature }.into(); + let req = req.into_outgoing_message(from, vec![]); + context + .outgoing_messages + .send(req) + .await + .context("failed to send outgoing message")?; + } + ValidatorAuthenticationResponse::Solution(req) => { + let mut ongoing_auth_requests = context.ongoing_auth_requests.write().await; + let Some(ongoing_challenge) = ongoing_auth_requests.remove(&from) else { + bail!( + "no ongoing hardware challenge for peer {from}, cannot handle ValidatorAuthenticationSolutionResponse" + ); + }; + + match req { + p2p::ValidatorAuthenticationSolutionResponse::Granted => {} + p2p::ValidatorAuthenticationSolutionResponse::Rejected => { + log::debug!("auth challenge rejected by node: {from}"); + return Ok(()); + } + } + + // auth was granted, finally send the hardware challenge + let mut is_authenticated_with_peer = context.is_authenticated_with_peer.write().await; + is_authenticated_with_peer.insert(from); + + let protocol = ongoing_challenge.outgoing_message.protocol(); + let req = ongoing_challenge + .outgoing_message + .into_outgoing_message(from, vec![]); + context + .outgoing_messages + .send(req) + .await + .context("failed to send outgoing message")?; + + let mut ongoing_outbound_requests = context.ongoing_outbound_requests.write().await; + ongoing_outbound_requests.insert((from, protocol), ongoing_challenge.response_tx); + } + } + Ok(()) +} diff --git a/crates/validator/src/p2p/mod.rs b/crates/validator/src/p2p/mod.rs index e756d33f..fcce43ec 100644 --- a/crates/validator/src/p2p/mod.rs +++ b/crates/validator/src/p2p/mod.rs @@ -1,87 +1,91 @@ use anyhow::{bail, Context as _, Result}; use futures::stream::FuturesUnordered; -use p2p::{ - IncomingMessage, Libp2pIncomingMessage, Node, NodeBuilder, OutgoingMessage, PeerId, - ValidatorAuthenticationInitiationRequest, ValidatorAuthenticationResponse, - ValidatorAuthenticationSolutionRequest, -}; +use p2p::{Keypair, Protocols}; +use shared::p2p::OutgoingRequest; +use shared::p2p::Service as P2PService; use shared::web3::wallet::Wallet; -use std::collections::HashMap; -use std::sync::Arc; use tokio::sync::mpsc::{Receiver, Sender}; -use tokio::sync::RwLock; use tokio_util::sync::CancellationToken; pub struct Service { - _node: Node, - dial_tx: p2p::DialSender, - incoming_messages: Receiver, + inner: P2PService, + + // converts incoming hardware challenges to outgoing requests + outgoing_message_tx: Sender, hardware_challenge_rx: Receiver, - cancellation_token: CancellationToken, - context: Context, } impl Service { - #[allow(clippy::too_many_arguments)] pub fn new( - keypair: p2p::Keypair, + keypair: Keypair, port: u16, cancellation_token: CancellationToken, wallet: Wallet, ) -> Result<(Self, Sender)> { - let (node, dial_tx, incoming_messages, outgoing_messages) = - build_p2p_node(keypair, port, cancellation_token.clone()) - .context("failed to build p2p node")?; let (hardware_challenge_tx, hardware_challenge_rx) = tokio::sync::mpsc::channel(100); - + let (inner, outgoing_message_tx) = P2PService::new( + keypair, + port, + cancellation_token.clone(), + wallet, + Protocols::new() + .with_hardware_challenge() + .with_validator_authentication(), + ) + .context("failed to create P2P service")?; Ok(( Self { - _node: node, - dial_tx, - incoming_messages, + inner, + outgoing_message_tx, hardware_challenge_rx, - cancellation_token, - context: Context::new(outgoing_messages, wallet), }, hardware_challenge_tx, )) } - pub async fn run(self) { + pub async fn run(self) -> Result<()> { use futures::StreamExt as _; let Self { - _node, - dial_tx, - mut incoming_messages, + inner, + outgoing_message_tx, mut hardware_challenge_rx, - cancellation_token, - context, } = self; - let mut message_handlers = FuturesUnordered::new(); + tokio::task::spawn(inner.run()); + + let mut futures = FuturesUnordered::new(); loop { tokio::select! { - _ = cancellation_token.cancelled() => { - break; - } - Some(message) = hardware_challenge_rx.recv() => { - if let Err(e) = handle_outgoing_hardware_challenge(message, dial_tx.clone(), context.clone()) - .await { - log::error!("failed to handle outgoing hardware challenge: {e}"); - } + Some(request) = hardware_challenge_rx.recv() => { + let (incoming_resp_tx, incoming_resp_rx) = tokio::sync::oneshot::channel(); + let fut = async move { + let resp = match incoming_resp_rx.await.context("outgoing request tx channel was dropped")? { + p2p::Response::HardwareChallenge(resp) => resp.response, + _ => bail!("unexpected response type for hardware challenge request"), + }; + let _ = request.response_tx.send(resp); + Ok(()) + }; + futures.push(fut); + + let outgoing_request = OutgoingRequest { + peer_wallet_address: request.worker_wallet_address, + peer_id: request.worker_p2p_id, + multiaddrs: request.worker_addresses, + request: p2p::HardwareChallengeRequest { + challenge: request.challenge, + timestamp: std::time::SystemTime::now(), + }.into(), + response_tx: incoming_resp_tx, + }; + outgoing_message_tx.send(outgoing_request).await + .context("failed to send outgoing hardware challenge request")?; } - Some(message) = incoming_messages.recv() => { - let context = context.clone(); - let handle = tokio::task::spawn( - handle_incoming_message(message, context) - ); - message_handlers.push(handle); - } - Some(res) = message_handlers.next() => { + Some(res) = futures.next() => { if let Err(e) = res { - log::error!("failed to handle incoming message: {e}"); + log::error!("failed to handle hardware challenge request: {e}"); } } } @@ -89,25 +93,6 @@ impl Service { } } -fn build_p2p_node( - keypair: p2p::Keypair, - port: u16, - cancellation_token: CancellationToken, -) -> Result<( - Node, - p2p::DialSender, - Receiver, - Sender, -)> { - NodeBuilder::new() - .with_keypair(keypair) - .with_port(port) - .with_validator_authentication() - .with_hardware_challenge() - .with_cancellation_token(cancellation_token) - .try_build() -} - pub struct HardwareChallengeRequest { pub(crate) worker_wallet_address: alloy::primitives::Address, pub(crate) worker_p2p_id: String, @@ -115,269 +100,3 @@ pub struct HardwareChallengeRequest { pub(crate) challenge: p2p::ChallengeRequest, pub(crate) response_tx: tokio::sync::oneshot::Sender, } - -#[derive(Clone)] -struct Context { - outgoing_messages: Sender, - ongoing_auth_requests: Arc>>, - ongoing_hardware_challenges: - Arc>>>, - wallet: Wallet, -} - -#[derive(Debug)] -struct OngoingAuthChallenge { - worker_wallet_address: alloy::primitives::Address, - auth_challenge_request_message: String, - hardware_challenge: p2p::ChallengeRequest, - hardware_challenge_response_tx: tokio::sync::oneshot::Sender, -} - -impl Context { - fn new(outgoing_messages: Sender, wallet: Wallet) -> Self { - Self { - outgoing_messages, - ongoing_auth_requests: Arc::new(RwLock::new(HashMap::new())), - ongoing_hardware_challenges: Arc::new(RwLock::new(HashMap::new())), - wallet, - } - } -} - -async fn handle_outgoing_hardware_challenge( - request: HardwareChallengeRequest, - dial_tx: p2p::DialSender, - context: Context, -) -> Result<()> { - use rand_v8::rngs::OsRng; - use rand_v8::Rng as _; - use std::str::FromStr as _; - - let HardwareChallengeRequest { - worker_wallet_address, - worker_p2p_id, - worker_addresses, - challenge, - response_tx, - } = request; - - log::debug!( - "sending hardware challenge to {worker_p2p_id} with addresses {worker_addresses:?}" - ); - - // first, dial the worker - let worker_p2p_id = - PeerId::from_str(&worker_p2p_id).context("failed to parse worker p2p id")?; - - // ensure there's no ongoing challenge - // use write-lock to make this atomic until we finish sending the auth request and writing to the map - let mut ongoing_auth_requests = context.ongoing_auth_requests.write().await; - if ongoing_auth_requests.contains_key(&worker_p2p_id) { - bail!( - "ongoing hardware challenge for {} already exists", - worker_p2p_id - ); - } - - let multiaddrs = worker_addresses - .iter() - .filter_map(|addr| { - p2p::Multiaddr::from_str(addr) - .ok()? - .with_p2p(worker_p2p_id) - .ok() - }) - .collect::>(); - if multiaddrs.is_empty() { - bail!("no valid multiaddrs for worker p2p id {worker_p2p_id}"); - } - - // TODO: we can improve this by checking if we're already connected to the peer before dialing - let (res_tx, res_rx) = tokio::sync::oneshot::channel(); - dial_tx - .send((multiaddrs, res_tx)) - .await - .context("failed to send dial request")?; - res_rx - .await - .context("failed to receive dial response")? - .context("failed to dial worker")?; - - // create the authentication challenge request message - let challenge_bytes: [u8; 32] = OsRng.gen(); - let auth_challenge_message: String = hex::encode(challenge_bytes); - - let req: p2p::Request = ValidatorAuthenticationInitiationRequest { - message: auth_challenge_message.clone(), - } - .into(); - let outgoing_message = req.into_outgoing_message(worker_p2p_id); - log::debug!( - "sending ValidatorAuthenticationInitiationRequest to {worker_p2p_id}" - ); - context - .outgoing_messages - .send(outgoing_message) - .await - .context("failed to send outgoing message")?; - - // store the ongoing hardware challenge - let ongoing_challenge = OngoingAuthChallenge { - worker_wallet_address, - auth_challenge_request_message: auth_challenge_message.clone(), - hardware_challenge: challenge, - hardware_challenge_response_tx: response_tx, - }; - - ongoing_auth_requests.insert(worker_p2p_id, ongoing_challenge); - Ok(()) -} - -async fn handle_incoming_message(message: IncomingMessage, context: Context) -> Result<()> { - match message.message { - Libp2pIncomingMessage::Request { - request_id: _, - request, - channel: _, - } => { - log::error!( - "validator should not receive incoming requests: {request:?} from {}", - message.peer - ); - } - Libp2pIncomingMessage::Response { - request_id: _, - response, - } => { - log::debug!("received incoming response {response:?}"); - handle_incoming_response(message.peer, response, context) - .await - .context("failed to handle incoming response")?; - } - } - Ok(()) -} - -async fn handle_incoming_response( - from: PeerId, - response: p2p::Response, - context: Context, -) -> Result<()> { - match response { - p2p::Response::ValidatorAuthentication(resp) => { - log::debug!("received ValidatorAuthenticationSolutionResponse from {from}: {resp:?}"); - handle_validation_authentication_response(from, resp, context) - .await - .context("failed to handle validator authentication response")?; - } - p2p::Response::HardwareChallenge(resp) => { - log::debug!("received HardwareChallengeResponse from {from}: {resp:?}"); - let mut ongoing_hardware_challenges = context.ongoing_hardware_challenges.write().await; - let Some(response_tx) = ongoing_hardware_challenges.remove(&from) else { - bail!( - "no ongoing hardware challenge for peer {from}, cannot handle HardwareChallengeResponse" - ); - }; - let _ = response_tx.send(resp.response); // timestamp is silently dropped, is it actually used anywhere? - } - p2p::Response::Invite(_) => { - log::error!("validator should not receive `Invite` responses: from {from}"); - } - p2p::Response::GetTaskLogs(_) => { - log::error!("validator should not receive `GetTaskLogs` responses: from {from}"); - } - p2p::Response::Restart(_) => { - log::error!("validator should not receive `Restart` responses: from {from}"); - } - p2p::Response::General(_) => { - todo!() - } - } - - Ok(()) -} - -async fn handle_validation_authentication_response( - from: PeerId, - response: p2p::ValidatorAuthenticationResponse, - context: Context, -) -> Result<()> { - use shared::security::request_signer::sign_message; - use std::str::FromStr as _; - - match response { - ValidatorAuthenticationResponse::Initiation(req) => { - let ongoing_auth_requests = context.ongoing_auth_requests.read().await; - let Some(ongoing_challenge) = ongoing_auth_requests.get(&from) else { - bail!( - "no ongoing hardware challenge for peer {from}, cannot handle ValidatorAuthenticationInitiationResponse" - ); - }; - - let Ok(parsed_signature) = alloy::primitives::Signature::from_str(&req.signature) - else { - bail!("Failed to parse signature from server"); - }; - - // recover address from the challenge message that the server signed - let Ok(recovered_address) = parsed_signature - .recover_address_from_msg(&ongoing_challenge.auth_challenge_request_message) - else { - bail!("Failed to recover address from server signature") - }; - - // verify the recovered address matches the expected worker wallet address - if recovered_address != ongoing_challenge.worker_wallet_address { - bail!( - "Server address verification failed: expected {}, got {recovered_address}", - ongoing_challenge.worker_wallet_address, - ) - } - - log::debug!("auth challenge initiation response received from node: {from}"); - let signature = sign_message(&req.message, &context.wallet).await.unwrap(); - - let req: p2p::Request = ValidatorAuthenticationSolutionRequest { signature }.into(); - let req = req.into_outgoing_message(from); - context - .outgoing_messages - .send(req) - .await - .context("failed to send outgoing message")?; - } - ValidatorAuthenticationResponse::Solution(req) => { - let mut ongoing_auth_requests = context.ongoing_auth_requests.write().await; - let Some(ongoing_challenge) = ongoing_auth_requests.remove(&from) else { - bail!( - "no ongoing hardware challenge for peer {from}, cannot handle ValidatorAuthenticationSolutionResponse" - ); - }; - - match req { - p2p::ValidatorAuthenticationSolutionResponse::Granted => {} - p2p::ValidatorAuthenticationSolutionResponse::Rejected => { - log::debug!("auth challenge rejected by node: {from}"); - return Ok(()); - } - } - - // auth was granted, finally send the hardware challenge - let req: p2p::Request = p2p::HardwareChallengeRequest { - challenge: ongoing_challenge.hardware_challenge, - timestamp: std::time::SystemTime::now(), - } - .into(); - let req = req.into_outgoing_message(from); - context - .outgoing_messages - .send(req) - .await - .context("failed to send outgoing message")?; - - let mut ongoing_hardware_challenges = context.ongoing_hardware_challenges.write().await; - ongoing_hardware_challenges - .insert(from, ongoing_challenge.hardware_challenge_response_tx); - } - } - Ok(()) -} diff --git a/crates/validator/src/validators/hardware_challenge.rs b/crates/validator/src/validators/hardware_challenge.rs index de04a96a..f84e1dea 100644 --- a/crates/validator/src/validators/hardware_challenge.rs +++ b/crates/validator/src/validators/hardware_challenge.rs @@ -53,6 +53,7 @@ impl HardwareChallenge { challenge: challenge_with_timestamp, response_tx, }; + // Send challenge via P2P self.challenge_tx .send(hardware_challenge) From 0046fac172db48c3cbb843ade33385b70775579d Mon Sep 17 00:00:00 2001 From: elizabeth Date: Thu, 10 Jul 2025 18:45:26 -0400 Subject: [PATCH 25/38] implement orchestrator p2p service --- Cargo.lock | 1 + crates/orchestrator/Cargo.toml | 5 +- crates/orchestrator/src/api/tests/helper.rs | 8 +- crates/orchestrator/src/lib.rs | 1 + crates/orchestrator/src/p2p/mod.rs | 174 ++++++++++++++++++ crates/p2p/Cargo.toml | 2 +- crates/validator/src/p2p/mod.rs | 6 +- .../src/validators/hardware_challenge.rs | 2 +- 8 files changed, 189 insertions(+), 10 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index c54a0ad1..1bcee1d6 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -6234,6 +6234,7 @@ dependencies = [ "serde_json", "shared", "tokio", + "tokio-util", "url", "utoipa", "utoipa-swagger-ui", diff --git a/crates/orchestrator/Cargo.toml b/crates/orchestrator/Cargo.toml index 6ac53140..cf31fdf5 100644 --- a/crates/orchestrator/Cargo.toml +++ b/crates/orchestrator/Cargo.toml @@ -7,6 +7,9 @@ edition.workspace = true workspace = true [dependencies] +p2p = { workspace = true} +shared = { workspace = true } + actix-web = { workspace = true } actix-web-prometheus = "0.1.2" alloy = { workspace = true } @@ -28,8 +31,8 @@ redis-test = { workspace = true } reqwest = { workspace = true } serde = { workspace = true } serde_json = { workspace = true } -shared = { workspace = true } tokio = { workspace = true } +tokio-util = { workspace = true } url = { workspace = true } utoipa = { version = "5.3.0", features = ["actix_extras", "chrono", "uuid"] } utoipa-swagger-ui = { version = "9.0.2", features = ["actix-web", "debug-embed", "reqwest", "vendored"] } diff --git a/crates/orchestrator/src/api/tests/helper.rs b/crates/orchestrator/src/api/tests/helper.rs index ca2e65c1..a5282b3a 100644 --- a/crates/orchestrator/src/api/tests/helper.rs +++ b/crates/orchestrator/src/api/tests/helper.rs @@ -18,7 +18,7 @@ use std::sync::Arc; use url::Url; #[cfg(test)] -pub async fn create_test_app_state() -> Data { +pub(crate) async fn create_test_app_state() -> Data { use shared::utils::MockStorageProvider; use crate::{ @@ -69,7 +69,7 @@ pub async fn create_test_app_state() -> Data { } #[cfg(test)] -pub async fn create_test_app_state_with_nodegroups() -> Data { +pub(crate) async fn create_test_app_state_with_nodegroups() -> Data { use shared::utils::MockStorageProvider; use crate::{ @@ -139,7 +139,7 @@ pub async fn create_test_app_state_with_nodegroups() -> Data { } #[cfg(test)] -pub fn setup_contract() -> Contracts { +pub(crate) fn setup_contract() -> Contracts { let coordinator_key = "0xdbda1821b80551c9d65939329250298aa3472ba22feea921c0cf5d620ea67b97"; let rpc_url: Url = Url::parse("http://localhost:8545").unwrap(); let wallet = Wallet::new(coordinator_key, rpc_url).unwrap(); @@ -154,7 +154,7 @@ pub fn setup_contract() -> Contracts { } #[cfg(test)] -pub async fn create_test_app_state_with_metrics() -> Data { +pub(crate) async fn create_test_app_state_with_metrics() -> Data { use shared::utils::MockStorageProvider; use crate::{ diff --git a/crates/orchestrator/src/lib.rs b/crates/orchestrator/src/lib.rs index 5f82d58d..a1d41f39 100644 --- a/crates/orchestrator/src/lib.rs +++ b/crates/orchestrator/src/lib.rs @@ -17,6 +17,7 @@ pub use metrics::webhook_sender::MetricsWebhookSender; pub use metrics::MetricsContext; pub use node::invite::NodeInviter; pub use p2p::client::P2PClient; +pub use p2p::Service as P2PService; pub use plugins::node_groups::NodeGroupConfiguration; pub use plugins::node_groups::NodeGroupsPlugin; pub use plugins::webhook::WebhookConfig; diff --git a/crates/orchestrator/src/p2p/mod.rs b/crates/orchestrator/src/p2p/mod.rs index 1d331315..63aa2192 100644 --- a/crates/orchestrator/src/p2p/mod.rs +++ b/crates/orchestrator/src/p2p/mod.rs @@ -1 +1,175 @@ pub(crate) mod client; + +use anyhow::{bail, Context as _, Result}; +use futures::stream::FuturesUnordered; +use futures::FutureExt; +use p2p::{Keypair, Protocols}; +use shared::p2p::OutgoingRequest; +use shared::p2p::Service as P2PService; +use shared::web3::wallet::Wallet; +use tokio::sync::mpsc::{Receiver, Sender}; +use tokio_util::sync::CancellationToken; + +pub struct Service { + inner: P2PService, + outgoing_message_tx: Sender, + invite_rx: Receiver, + get_task_logs_rx: Receiver, + restart_task_rx: Receiver, +} + +impl Service { + pub fn new( + keypair: Keypair, + port: u16, + cancellation_token: CancellationToken, + wallet: Wallet, + ) -> Result<( + Self, + Sender, + Sender, + Sender, + )> { + let (invite_tx, invite_rx) = tokio::sync::mpsc::channel(100); + let (get_task_logs_tx, get_task_logs_rx) = tokio::sync::mpsc::channel(100); + let (restart_task_tx, restart_task_rx) = tokio::sync::mpsc::channel(100); + let (inner, outgoing_message_tx) = P2PService::new( + keypair, + port, + cancellation_token.clone(), + wallet, + Protocols::new() + .with_invite() + .with_get_task_logs() + .with_restart() + .with_validator_authentication(), + ) + .context("failed to create p2p service")?; + Ok(( + Self { + inner, + outgoing_message_tx, + invite_rx, + get_task_logs_rx, + restart_task_rx, + }, + invite_tx, + get_task_logs_tx, + restart_task_tx, + )) + } + + pub async fn run(self) -> Result<()> { + use futures::StreamExt as _; + + let Self { + inner, + outgoing_message_tx, + mut invite_rx, + mut get_task_logs_rx, + mut restart_task_rx, + } = self; + + tokio::task::spawn(inner.run()); + + let mut futures = FuturesUnordered::new(); + + loop { + tokio::select! { + Some(request) = invite_rx.recv() => { + let (incoming_resp_tx, incoming_resp_rx) = tokio::sync::oneshot::channel(); + let fut = async move { + let resp = match incoming_resp_rx.await.context("outgoing request tx channel was dropped")? { + p2p::Response::Invite(resp) => resp, + _ => bail!("unexpected response type for invite request"), + }; + request.response_tx.send(resp).map_err(|_|anyhow::anyhow!("caller dropped response channel"))?; + Ok(()) + }.boxed(); + futures.push(fut); + + let outgoing_request = OutgoingRequest { + peer_wallet_address: request.worker_wallet_address, + peer_id: request.worker_p2p_id, + multiaddrs: request.worker_addresses, + request: request.invite.into(), + response_tx: incoming_resp_tx, + }; + outgoing_message_tx.send(outgoing_request).await + .context("failed to send outgoing invite request")?; + } + Some(request) = get_task_logs_rx.recv() => { + let (incoming_resp_tx, incoming_resp_rx) = tokio::sync::oneshot::channel(); + let fut = async move { + let resp = match incoming_resp_rx.await.context("outgoing request tx channel was dropped")? { + p2p::Response::GetTaskLogs(resp) => resp, + _ => bail!("unexpected response type for get task logs request"), + }; + request.response_tx.send(resp).map_err(|_|anyhow::anyhow!("caller dropped response channel"))?; + Ok(()) + }.boxed(); + futures.push(fut); + + let outgoing_request = OutgoingRequest { + peer_wallet_address: request.worker_wallet_address, + peer_id: request.worker_p2p_id, + multiaddrs: request.worker_addresses, + request: p2p::Request::GetTaskLogs.into(), + response_tx: incoming_resp_tx, + }; + outgoing_message_tx.send(outgoing_request).await + .context("failed to send outgoing get task logs request")?; + } + Some(request) = restart_task_rx.recv() => { + let (incoming_resp_tx, incoming_resp_rx) = tokio::sync::oneshot::channel(); + let fut = async move { + let resp = match incoming_resp_rx.await.context("outgoing request tx channel was dropped")? { + p2p::Response::Restart(resp) => resp, + _ => bail!("unexpected response type for restart task request"), + }; + request.response_tx.send(resp).map_err(|_|anyhow::anyhow!("caller dropped response channel"))?; + Ok(()) + }.boxed(); + futures.push(fut); + + let outgoing_request = OutgoingRequest { + peer_wallet_address: request.worker_wallet_address, + peer_id: request.worker_p2p_id, + multiaddrs: request.worker_addresses, + request: p2p::Request::Restart.into(), + response_tx: incoming_resp_tx, + }; + outgoing_message_tx.send(outgoing_request).await + .context("failed to send outgoing restart task request")?; + } + Some(res) = futures.next() => { + if let Err(e) = res { + log::error!("failed to handle response conversion: {e}"); + } + } + } + } + } +} + +pub struct InviteRequest { + pub(crate) worker_wallet_address: alloy::primitives::Address, + pub(crate) worker_p2p_id: String, + pub(crate) worker_addresses: Vec, + pub(crate) invite: p2p::InviteRequest, + pub(crate) response_tx: tokio::sync::oneshot::Sender, +} + +pub struct GetTaskLogsRequest { + pub(crate) worker_wallet_address: alloy::primitives::Address, + pub(crate) worker_p2p_id: String, + pub(crate) worker_addresses: Vec, + pub(crate) response_tx: tokio::sync::oneshot::Sender, +} + +pub struct RestartTaskRequest { + pub(crate) worker_wallet_address: alloy::primitives::Address, + pub(crate) worker_p2p_id: String, + pub(crate) worker_addresses: Vec, + pub(crate) response_tx: tokio::sync::oneshot::Sender, +} diff --git a/crates/p2p/Cargo.toml b/crates/p2p/Cargo.toml index ba52d570..dc5efc40 100644 --- a/crates/p2p/Cargo.toml +++ b/crates/p2p/Cargo.toml @@ -11,7 +11,7 @@ anyhow = {workspace = true} nalgebra = {workspace = true} serde = {workspace = true} tokio = {workspace = true, features = ["sync"]} -tokio-util = { workspace = true, features = ["rt"] } +tokio-util = { workspace = true } [lints] workspace = true diff --git a/crates/validator/src/p2p/mod.rs b/crates/validator/src/p2p/mod.rs index fcce43ec..dc6b23e6 100644 --- a/crates/validator/src/p2p/mod.rs +++ b/crates/validator/src/p2p/mod.rs @@ -32,7 +32,7 @@ impl Service { .with_hardware_challenge() .with_validator_authentication(), ) - .context("failed to create P2P service")?; + .context("failed to create p2p service")?; Ok(( Self { inner, @@ -65,7 +65,7 @@ impl Service { p2p::Response::HardwareChallenge(resp) => resp.response, _ => bail!("unexpected response type for hardware challenge request"), }; - let _ = request.response_tx.send(resp); + request.response_tx.send(resp).map_err(|_|anyhow::anyhow!("caller dropped response channel"))?; Ok(()) }; futures.push(fut); @@ -85,7 +85,7 @@ impl Service { } Some(res) = futures.next() => { if let Err(e) = res { - log::error!("failed to handle hardware challenge request: {e}"); + log::error!("failed to handle response conversion: {e}"); } } } diff --git a/crates/validator/src/validators/hardware_challenge.rs b/crates/validator/src/validators/hardware_challenge.rs index f84e1dea..6970355d 100644 --- a/crates/validator/src/validators/hardware_challenge.rs +++ b/crates/validator/src/validators/hardware_challenge.rs @@ -53,7 +53,7 @@ impl HardwareChallenge { challenge: challenge_with_timestamp, response_tx, }; - + // Send challenge via P2P self.challenge_tx .send(hardware_challenge) From 08a10ec0377c15204b69dac94ea09ff5baed3bfb Mon Sep 17 00:00:00 2001 From: elizabeth Date: Thu, 10 Jul 2025 19:16:18 -0400 Subject: [PATCH 26/38] update orchestrator to use libp2p node --- crates/orchestrator/src/api/routes/groups.rs | 24 +++-- crates/orchestrator/src/api/routes/nodes.rs | 42 ++++++-- crates/orchestrator/src/api/server.rs | 32 +++--- crates/orchestrator/src/api/tests/helper.rs | 42 +++----- crates/orchestrator/src/discovery/monitor.rs | 7 +- crates/orchestrator/src/lib.rs | 1 - crates/orchestrator/src/main.rs | 73 +++++++++---- crates/orchestrator/src/node/invite.rs | 82 ++++++++------- crates/orchestrator/src/p2p/client.rs | 102 ------------------- crates/orchestrator/src/p2p/mod.rs | 22 ++-- 10 files changed, 192 insertions(+), 235 deletions(-) delete mode 100644 crates/orchestrator/src/p2p/client.rs diff --git a/crates/orchestrator/src/api/routes/groups.rs b/crates/orchestrator/src/api/routes/groups.rs index 44b22cd9..414f524a 100644 --- a/crates/orchestrator/src/api/routes/groups.rs +++ b/crates/orchestrator/src/api/routes/groups.rs @@ -236,9 +236,6 @@ async fn fetch_node_logs_p2p( match node { Some(node) => { - // Check if P2P client is available - let p2p_client = app_state.p2p_client.clone(); - // Check if node has P2P information let (worker_p2p_id, worker_p2p_addresses) = match (&node.worker_p2p_id, &node.worker_p2p_addresses) { @@ -254,11 +251,22 @@ async fn fetch_node_logs_p2p( }; // Send P2P request for task logs - match tokio::time::timeout( - Duration::from_secs(NODE_REQUEST_TIMEOUT), - p2p_client.get_task_logs(node_address, worker_p2p_id, worker_p2p_addresses), - ) - .await + let (response_tx, response_rx) = tokio::sync::oneshot::channel(); + let get_task_logs_request = crate::p2p::GetTaskLogsRequest { + worker_wallet_address: node_address, + worker_p2p_id: worker_p2p_id.clone(), + worker_addresses: worker_p2p_addresses.clone(), + response_tx, + }; + if let Err(e) = app_state.get_task_logs_tx.send(get_task_logs_request).await { + error!("Failed to send GetTaskLogsRequest for node {node_address}: {e}"); + return json!({ + "success": false, + "error": format!("Failed to send request: {}", e), + "status": node.status.to_string() + }); + }; + match tokio::time::timeout(Duration::from_secs(NODE_REQUEST_TIMEOUT), response_rx).await { Ok(Ok(log_lines)) => { json!({ diff --git a/crates/orchestrator/src/api/routes/nodes.rs b/crates/orchestrator/src/api/routes/nodes.rs index a260706a..9debddde 100644 --- a/crates/orchestrator/src/api/routes/nodes.rs +++ b/crates/orchestrator/src/api/routes/nodes.rs @@ -164,11 +164,22 @@ async fn restart_node_task(node_id: web::Path, app_state: Data .as_ref() .expect("worker_p2p_addresses should be present"); - match app_state - .p2p_client - .restart_task(node_address, p2p_id, p2p_addresses) - .await - { + let (response_tx, response_rx) = tokio::sync::oneshot::channel(); + let restart_task_request = crate::p2p::RestartTaskRequest { + worker_wallet_address: node.address, + worker_p2p_id: p2p_id.clone(), + worker_addresses: p2p_addresses.clone(), + response_tx, + }; + if let Err(e) = app_state.restart_task_tx.send(restart_task_request).await { + error!("Failed to send restart task request: {e}"); + return HttpResponse::InternalServerError().json(json!({ + "success": false, + "error": "Failed to send restart task request" + })); + } + + match response_rx.await { Ok(_) => HttpResponse::Ok().json(json!({ "success": true, "message": "Task restarted successfully" @@ -240,11 +251,22 @@ async fn get_node_logs(node_id: web::Path, app_state: Data) -> })); }; - match app_state - .p2p_client - .get_task_logs(node_address, p2p_id, p2p_addresses) - .await - { + let (response_tx, response_rx) = tokio::sync::oneshot::channel(); + let get_task_logs_request = crate::p2p::GetTaskLogsRequest { + worker_wallet_address: node.address, + worker_p2p_id: p2p_id.clone(), + worker_addresses: p2p_addresses.clone(), + response_tx, + }; + if let Err(e) = app_state.get_task_logs_tx.send(get_task_logs_request).await { + error!("Failed to send get task logs request: {e}"); + return HttpResponse::InternalServerError().json(json!({ + "success": false, + "error": "Failed to send get task logs request" + })); + } + + match response_rx.await { Ok(logs) => HttpResponse::Ok().json(json!({ "success": true, "logs": logs diff --git a/crates/orchestrator/src/api/server.rs b/crates/orchestrator/src/api/server.rs index 095bcb6c..fc5943c9 100644 --- a/crates/orchestrator/src/api/server.rs +++ b/crates/orchestrator/src/api/server.rs @@ -5,7 +5,7 @@ use crate::api::routes::task::tasks_routes; use crate::api::routes::{heartbeat::heartbeat_routes, metrics::metrics_routes}; use crate::metrics::MetricsContext; use crate::models::node::NodeStatus; -use crate::p2p::client::P2PClient; +use crate::p2p::{GetTaskLogsRequest, RestartTaskRequest}; use crate::plugins::node_groups::NodeGroupsPlugin; use crate::scheduler::Scheduler; use crate::store::core::{RedisStore, StoreContext}; @@ -23,6 +23,7 @@ use shared::utils::StorageProvider; use shared::web3::contracts::core::builder::Contracts; use shared::web3::wallet::WalletProvider; use std::sync::Arc; +use tokio::sync::mpsc::Sender; use utoipa::{ openapi::security::{ApiKey, ApiKeyValue, SecurityScheme}, Modify, OpenApi, @@ -116,17 +117,18 @@ async fn health_check(data: web::Data) -> HttpResponse { } pub(crate) struct AppState { - pub store_context: Arc, - pub storage_provider: Option>, - pub heartbeats: Arc, - pub redis_store: Arc, - pub hourly_upload_limit: i64, - pub contracts: Option>, - pub pool_id: u32, - pub scheduler: Scheduler, - pub node_groups_plugin: Option>, - pub metrics: Arc, - pub p2p_client: Arc, + pub(crate) store_context: Arc, + pub(crate) storage_provider: Option>, + pub(crate) heartbeats: Arc, + pub(crate) redis_store: Arc, + pub(crate) hourly_upload_limit: i64, + pub(crate) contracts: Option>, + pub(crate) pool_id: u32, + pub(crate) scheduler: Scheduler, + pub(crate) node_groups_plugin: Option>, + pub(crate) metrics: Arc, + pub(crate) get_task_logs_tx: Sender, + pub(crate) restart_task_tx: Sender, } #[allow(clippy::too_many_arguments)] @@ -145,7 +147,8 @@ pub async fn start_server( scheduler: Scheduler, node_groups_plugin: Option>, metrics: Arc, - p2p_client: Arc, + get_task_logs_tx: Sender, + restart_task_tx: Sender, ) -> Result<(), Error> { info!("Starting server at http://{host}:{port}"); let app_state = Data::new(AppState { @@ -159,7 +162,8 @@ pub async fn start_server( scheduler, node_groups_plugin, metrics, - p2p_client, + get_task_logs_tx, + restart_task_tx, }); let node_store = app_state.store_context.node_store.clone(); let node_store_clone = node_store.clone(); diff --git a/crates/orchestrator/src/api/tests/helper.rs b/crates/orchestrator/src/api/tests/helper.rs index a5282b3a..92b26cce 100644 --- a/crates/orchestrator/src/api/tests/helper.rs +++ b/crates/orchestrator/src/api/tests/helper.rs @@ -22,8 +22,8 @@ pub(crate) async fn create_test_app_state() -> Data { use shared::utils::MockStorageProvider; use crate::{ - metrics::MetricsContext, p2p::client::P2PClient, scheduler::Scheduler, - utils::loop_heartbeats::LoopHeartbeats, ServerMode, + metrics::MetricsContext, scheduler::Scheduler, utils::loop_heartbeats::LoopHeartbeats, + ServerMode, }; let store = Arc::new(RedisStore::new_test()); @@ -46,12 +46,8 @@ pub(crate) async fn create_test_app_state() -> Data { let mock_storage = MockStorageProvider::new(); let storage_provider = Arc::new(mock_storage); let metrics = Arc::new(MetricsContext::new(1.to_string())); - let wallet = Wallet::new( - "0xdbda1821b80551c9d65939329250298aa3472ba22feea921c0cf5d620ea67b97", - Url::parse("http://localhost:8545").unwrap(), - ) - .unwrap(); - let p2p_client = Arc::new(P2PClient::new(wallet.clone()).await.unwrap()); + let (get_task_logs_tx, _) = tokio::sync::mpsc::channel(0); + let (restart_task_tx, _) = tokio::sync::mpsc::channel(0); Data::new(AppState { store_context: store_context.clone(), @@ -64,7 +60,8 @@ pub(crate) async fn create_test_app_state() -> Data { scheduler, node_groups_plugin: None, metrics, - p2p_client: p2p_client.clone(), + get_task_logs_tx, + restart_task_tx, }) } @@ -74,7 +71,6 @@ pub(crate) async fn create_test_app_state_with_nodegroups() -> Data { use crate::{ metrics::MetricsContext, - p2p::client::P2PClient, plugins::node_groups::{NodeGroupConfiguration, NodeGroupsPlugin}, scheduler::Scheduler, utils::loop_heartbeats::LoopHeartbeats, @@ -116,12 +112,8 @@ pub(crate) async fn create_test_app_state_with_nodegroups() -> Data { let mock_storage = MockStorageProvider::new(); let storage_provider = Arc::new(mock_storage); let metrics = Arc::new(MetricsContext::new(1.to_string())); - let wallet = Wallet::new( - "0xdbda1821b80551c9d65939329250298aa3472ba22feea921c0cf5d620ea67b97", - Url::parse("http://localhost:8545").unwrap(), - ) - .unwrap(); - let p2p_client = Arc::new(P2PClient::new(wallet.clone()).await.unwrap()); + let (get_task_logs_tx, _) = tokio::sync::mpsc::channel(0); + let (restart_task_tx, _) = tokio::sync::mpsc::channel(0); Data::new(AppState { store_context: store_context.clone(), @@ -134,7 +126,8 @@ pub(crate) async fn create_test_app_state_with_nodegroups() -> Data { scheduler, node_groups_plugin, metrics, - p2p_client: p2p_client.clone(), + get_task_logs_tx, + restart_task_tx, }) } @@ -158,8 +151,8 @@ pub(crate) async fn create_test_app_state_with_metrics() -> Data { use shared::utils::MockStorageProvider; use crate::{ - metrics::MetricsContext, p2p::client::P2PClient, scheduler::Scheduler, - utils::loop_heartbeats::LoopHeartbeats, ServerMode, + metrics::MetricsContext, scheduler::Scheduler, utils::loop_heartbeats::LoopHeartbeats, + ServerMode, }; let store = Arc::new(RedisStore::new_test()); @@ -182,12 +175,8 @@ pub(crate) async fn create_test_app_state_with_metrics() -> Data { let mock_storage = MockStorageProvider::new(); let storage_provider = Arc::new(mock_storage); let metrics = Arc::new(MetricsContext::new("0".to_string())); - let wallet = Wallet::new( - "0xdbda1821b80551c9d65939329250298aa3472ba22feea921c0cf5d620ea67b97", - Url::parse("http://localhost:8545").unwrap(), - ) - .unwrap(); - let p2p_client = Arc::new(P2PClient::new(wallet.clone()).await.unwrap()); + let (get_task_logs_tx, _) = tokio::sync::mpsc::channel(0); + let (restart_task_tx, _) = tokio::sync::mpsc::channel(0); Data::new(AppState { store_context: store_context.clone(), @@ -200,6 +189,7 @@ pub(crate) async fn create_test_app_state_with_metrics() -> Data { scheduler, node_groups_plugin: None, metrics, - p2p_client: p2p_client.clone(), + get_task_logs_tx, + restart_task_tx, }) } diff --git a/crates/orchestrator/src/discovery/monitor.rs b/crates/orchestrator/src/discovery/monitor.rs index 56fed833..d1ea3133 100644 --- a/crates/orchestrator/src/discovery/monitor.rs +++ b/crates/orchestrator/src/discovery/monitor.rs @@ -384,15 +384,12 @@ impl DiscoveryMonitor { if let Some(balance) = discovery_node.latest_balance { if balance == U256::ZERO { - info!( - "Node {} has zero balance, marking as low balance", - node_address - ); + info!("Node {node_address} has zero balance, marking as low balance"); if let Err(e) = self .update_node_status(&node_address, NodeStatus::LowBalance) .await { - error!("Error updating node status: {}", e); + error!("Error updating node status: {e}"); } } } diff --git a/crates/orchestrator/src/lib.rs b/crates/orchestrator/src/lib.rs index a1d41f39..19d13eba 100644 --- a/crates/orchestrator/src/lib.rs +++ b/crates/orchestrator/src/lib.rs @@ -16,7 +16,6 @@ pub use metrics::sync_service::MetricsSyncService; pub use metrics::webhook_sender::MetricsWebhookSender; pub use metrics::MetricsContext; pub use node::invite::NodeInviter; -pub use p2p::client::P2PClient; pub use p2p::Service as P2PService; pub use plugins::node_groups::NodeGroupConfiguration; pub use plugins::node_groups::NodeGroupsPlugin; diff --git a/crates/orchestrator/src/main.rs b/crates/orchestrator/src/main.rs index f9beaccb..5f8e2af2 100644 --- a/crates/orchestrator/src/main.rs +++ b/crates/orchestrator/src/main.rs @@ -9,12 +9,13 @@ use shared::web3::contracts::core::builder::ContractBuilder; use shared::web3::wallet::Wallet; use std::sync::Arc; use tokio::task::JoinSet; +use tokio_util::sync::CancellationToken; use url::Url; use orchestrator::{ start_server, DiscoveryMonitor, LoopHeartbeats, MetricsContext, MetricsSyncService, MetricsWebhookSender, NodeGroupConfiguration, NodeGroupsPlugin, NodeInviter, NodeStatusUpdater, - P2PClient, RedisStore, Scheduler, SchedulerPlugin, ServerMode, StatusUpdatePlugin, + P2PService, RedisStore, Scheduler, SchedulerPlugin, ServerMode, StatusUpdatePlugin, StoreContext, WebhookConfig, WebhookPlugin, }; @@ -91,6 +92,10 @@ struct Args { /// Max healthy nodes with same endpoint #[arg(long, default_value = "1")] max_healthy_nodes_with_same_endpoint: u32, + + /// Libp2p port + #[arg(long, default_value = "4004")] + libp2p_port: u16, } #[tokio::main] @@ -143,7 +148,27 @@ async fn main() -> Result<()> { let store = Arc::new(RedisStore::new(&args.redis_store_url)); let store_context = Arc::new(StoreContext::new(store.clone())); - let p2p_client = Arc::new(P2PClient::new(wallet.clone()).await.unwrap()); + let keypair = p2p::Keypair::generate_ed25519(); + let cancellation_token = CancellationToken::new(); + let (p2p_service, invite_tx, get_task_logs_tx, restart_task_tx) = { + match P2PService::new( + keypair, + args.libp2p_port, + cancellation_token.clone(), + wallet.clone(), + ) { + Ok(res) => { + info!("p2p service initialized successfully"); + res + } + Err(e) => { + error!("failed to initialize p2p service: {e}"); + std::process::exit(1); + } + } + }; + + tokio::task::spawn(p2p_service.run()); let contracts = ContractBuilder::new(wallet.provider()) .with_compute_registry() @@ -297,24 +322,29 @@ async fn main() -> Result<()> { let inviter_store_context = store_context.clone(); let inviter_heartbeats = heartbeats.clone(); - tasks.spawn({ - let wallet = wallet.clone(); - let p2p_client = p2p_client.clone(); - async move { - let inviter = NodeInviter::new( - wallet, - compute_pool_id, - domain_id, - args.host.as_deref(), - Some(&args.port), - args.url.as_deref(), - inviter_store_context.clone(), - inviter_heartbeats.clone(), - p2p_client, - ); - inviter.run().await + let wallet = wallet.clone(); + let inviter = match NodeInviter::new( + wallet, + compute_pool_id, + domain_id, + args.host.as_deref(), + Some(&args.port), + args.url.as_deref(), + inviter_store_context.clone(), + inviter_heartbeats.clone(), + invite_tx, + ) { + Ok(inviter) => { + info!("Node inviter initialized successfully"); + inviter } - }); + Err(e) => { + error!("Failed to initialize node inviter: {e}"); + std::process::exit(1); + } + }; + + tasks.spawn(async move { inviter.run().await }); // Create status_update_plugins for status updater let mut status_updater_plugins: Vec = vec![]; @@ -387,7 +417,8 @@ async fn main() -> Result<()> { scheduler, node_groups_plugin, metrics_context, - p2p_client, + get_task_logs_tx, + restart_task_tx, ) => { if let Err(e) = res { error!("Server error: {e}"); @@ -403,6 +434,8 @@ async fn main() -> Result<()> { } } + // TODO: use cancellation token to gracefully shutdown tasks + cancellation_token.cancel(); tasks.shutdown().await; Ok(()) } diff --git a/crates/orchestrator/src/node/invite.rs b/crates/orchestrator/src/node/invite.rs index 17ae4207..8391d047 100644 --- a/crates/orchestrator/src/node/invite.rs +++ b/crates/orchestrator/src/node/invite.rs @@ -1,40 +1,40 @@ use crate::models::node::NodeStatus; use crate::models::node::OrchestratorNode; -use crate::p2p::client::P2PClient; +use crate::p2p::InviteRequest as InviteRequestWithMetadata; use crate::store::core::StoreContext; use crate::utils::loop_heartbeats::LoopHeartbeats; use alloy::primitives::utils::keccak256 as keccak; use alloy::primitives::U256; use alloy::signers::Signer; -use anyhow::Result; +use anyhow::{bail, Result}; use futures::stream; use futures::StreamExt; use log::{debug, error, info, warn}; -use shared::models::invite::InviteRequest; +use p2p::InviteRequest; +use p2p::InviteRequestUrl; use shared::web3::wallet::Wallet; use std::sync::Arc; use std::time::SystemTime; use std::time::UNIX_EPOCH; +use tokio::sync::mpsc::Sender; use tokio::time::{interval, Duration}; // Timeout constants const DEFAULT_INVITE_CONCURRENT_COUNT: usize = 32; // Max concurrent count of nodes being invited -pub struct NodeInviter<'a> { +pub struct NodeInviter { wallet: Wallet, pool_id: u32, domain_id: u32, - host: Option<&'a str>, - port: Option<&'a u16>, - url: Option<&'a str>, + url: InviteRequestUrl, store_context: Arc, heartbeats: Arc, - p2p_client: Arc, + invite_tx: Sender, } -impl<'a> NodeInviter<'a> { +impl NodeInviter { #[allow(clippy::too_many_arguments)] - pub fn new( + pub fn new<'a>( wallet: Wallet, pool_id: u32, domain_id: u32, @@ -43,19 +43,31 @@ impl<'a> NodeInviter<'a> { url: Option<&'a str>, store_context: Arc, heartbeats: Arc, - p2p_client: Arc, - ) -> Self { - Self { + invite_tx: Sender, + ) -> Result { + let url = if let Some(url) = url { + InviteRequestUrl::MasterUrl(url.to_string()) + } else { + let Some(host) = host else { + bail!("either host or url must be provided"); + }; + + let Some(port) = port else { + bail!("either port or url must be provided"); + }; + + InviteRequestUrl::MasterIpPort(host.to_string(), *port) + }; + + Ok(Self { wallet, pool_id, domain_id, - host, - port, url, store_context, heartbeats, - p2p_client, - } + invite_tx, + }) } pub async fn run(&self) -> Result<()> { @@ -71,7 +83,7 @@ impl<'a> NodeInviter<'a> { } } - async fn _generate_invite( + async fn generate_invite( &self, node: &OrchestratorNode, nonce: [u8; 32], @@ -102,7 +114,7 @@ impl<'a> NodeInviter<'a> { Ok(signature) } - async fn _send_invite(&self, node: &OrchestratorNode) -> Result<(), anyhow::Error> { + async fn send_invite(&self, node: &OrchestratorNode) -> Result<(), anyhow::Error> { if node.worker_p2p_id.is_none() || node.worker_p2p_addresses.is_none() { return Err(anyhow::anyhow!("Node does not have p2p information")); } @@ -120,21 +132,11 @@ impl<'a> NodeInviter<'a> { ) .to_be_bytes(); - let invite_signature = self._generate_invite(node, nonce, expiration).await?; + let invite_signature = self.generate_invite(node, nonce, expiration).await?; let payload = InviteRequest { invite: hex::encode(invite_signature), pool_id: self.pool_id, - master_url: self.url.map(|u| u.to_string()), - master_ip: if self.url.is_none() { - self.host.map(|h| h.to_string()) - } else { - None - }, - master_port: if self.url.is_none() { - self.port.copied() - } else { - None - }, + url: self.url.clone(), timestamp: SystemTime::now() .duration_since(UNIX_EPOCH) .map_err(|e| anyhow::anyhow!("System time error: {}", e))? @@ -145,11 +147,19 @@ impl<'a> NodeInviter<'a> { info!("Sending invite to node: {p2p_id}"); - match self - .p2p_client - .invite_worker(node.address, p2p_id, p2p_addresses, payload) + let (response_tx, response_rx) = tokio::sync::oneshot::channel(); + let invite = InviteRequestWithMetadata { + worker_wallet_address: node.address, + worker_p2p_id: p2p_id.clone(), + worker_addresses: p2p_addresses.clone(), + invite: payload, + response_tx, + }; + self.invite_tx + .send(invite) .await - { + .map_err(|_| anyhow::anyhow!("failed to send invite request"))?; + match response_rx.await { Ok(_) => { info!("Successfully invited node"); if let Err(e) = self @@ -182,7 +192,7 @@ impl<'a> NodeInviter<'a> { let invited_nodes = stream::iter(nodes.into_iter().map(|node| async move { info!("Processing node {:?}", node.address); - match self._send_invite(&node).await { + match self.send_invite(&node).await { Ok(_) => { info!("Successfully processed node {:?}", node.address); Ok(()) diff --git a/crates/orchestrator/src/p2p/client.rs b/crates/orchestrator/src/p2p/client.rs deleted file mode 100644 index 39810151..00000000 --- a/crates/orchestrator/src/p2p/client.rs +++ /dev/null @@ -1,102 +0,0 @@ -use alloy::primitives::Address; -use anyhow::Result; -use log::{info, warn}; -use shared::models::invite::InviteRequest; -use shared::p2p::{client::P2PClient as SharedP2PClient, messages::P2PMessage}; -use shared::web3::wallet::Wallet; - -pub struct P2PClient { - shared_client: SharedP2PClient, -} - -impl P2PClient { - pub async fn new(wallet: Wallet) -> Result { - let shared_client = SharedP2PClient::new(wallet).await?; - Ok(Self { shared_client }) - } - - pub async fn invite_worker( - &self, - worker_wallet_address: Address, - worker_p2p_id: &str, - worker_addresses: &[String], - invite: InviteRequest, - ) -> Result<()> { - let response = self - .shared_client - .send_request( - worker_p2p_id, - worker_addresses, - worker_wallet_address, - P2PMessage::Invite(invite), - 20, - ) - .await?; - - match response { - P2PMessage::InviteResponse { status, error } => { - if status == "ok" { - info!("Successfully invited worker {worker_p2p_id}"); - Ok(()) - } else { - let error_msg = error.unwrap_or_else(|| "Unknown error".to_string()); - warn!("Failed to invite worker {worker_p2p_id}: {error_msg}"); - Err(anyhow::anyhow!("Invite failed: {}", error_msg)) - } - } - _ => Err(anyhow::anyhow!("Unexpected response type for invite")), - } - } - - pub async fn get_task_logs( - &self, - worker_wallet_address: Address, - worker_p2p_id: &str, - worker_addresses: &[String], - ) -> Result> { - let response = self - .shared_client - .send_request( - worker_p2p_id, - worker_addresses, - worker_wallet_address, - P2PMessage::GetTaskLogs, - 20, - ) - .await?; - - match response { - P2PMessage::GetTaskLogsResponse { logs } => { - logs.map_err(|e| anyhow::anyhow!("Failed to get task logs: {}", e)) - } - _ => Err(anyhow::anyhow!( - "Unexpected response type for get_task_logs" - )), - } - } - - pub async fn restart_task( - &self, - worker_wallet_address: Address, - worker_p2p_id: &str, - worker_addresses: &[String], - ) -> Result<()> { - let response = self - .shared_client - .send_request( - worker_p2p_id, - worker_addresses, - worker_wallet_address, - P2PMessage::RestartTask, - 25, - ) - .await?; - - match response { - P2PMessage::RestartTaskResponse { result } => { - result.map_err(|e| anyhow::anyhow!("Failed to restart task: {}", e)) - } - _ => Err(anyhow::anyhow!("Unexpected response type for restart_task")), - } - } -} diff --git a/crates/orchestrator/src/p2p/mod.rs b/crates/orchestrator/src/p2p/mod.rs index 63aa2192..f3bf57cf 100644 --- a/crates/orchestrator/src/p2p/mod.rs +++ b/crates/orchestrator/src/p2p/mod.rs @@ -1,5 +1,3 @@ -pub(crate) mod client; - use anyhow::{bail, Context as _, Result}; use futures::stream::FuturesUnordered; use futures::FutureExt; @@ -19,6 +17,7 @@ pub struct Service { } impl Service { + #[allow(clippy::type_complexity)] pub fn new( keypair: Keypair, port: u16, @@ -79,9 +78,8 @@ impl Service { Some(request) = invite_rx.recv() => { let (incoming_resp_tx, incoming_resp_rx) = tokio::sync::oneshot::channel(); let fut = async move { - let resp = match incoming_resp_rx.await.context("outgoing request tx channel was dropped")? { - p2p::Response::Invite(resp) => resp, - _ => bail!("unexpected response type for invite request"), + let p2p::Response::Invite(resp) = incoming_resp_rx.await.context("outgoing request tx channel was dropped")? else { + bail!("unexpected response type for invite request"); }; request.response_tx.send(resp).map_err(|_|anyhow::anyhow!("caller dropped response channel"))?; Ok(()) @@ -101,9 +99,8 @@ impl Service { Some(request) = get_task_logs_rx.recv() => { let (incoming_resp_tx, incoming_resp_rx) = tokio::sync::oneshot::channel(); let fut = async move { - let resp = match incoming_resp_rx.await.context("outgoing request tx channel was dropped")? { - p2p::Response::GetTaskLogs(resp) => resp, - _ => bail!("unexpected response type for get task logs request"), + let p2p::Response::GetTaskLogs(resp) = incoming_resp_rx.await.context("outgoing request tx channel was dropped")? else { + bail!("unexpected response type for get task logs request"); }; request.response_tx.send(resp).map_err(|_|anyhow::anyhow!("caller dropped response channel"))?; Ok(()) @@ -114,7 +111,7 @@ impl Service { peer_wallet_address: request.worker_wallet_address, peer_id: request.worker_p2p_id, multiaddrs: request.worker_addresses, - request: p2p::Request::GetTaskLogs.into(), + request: p2p::Request::GetTaskLogs, response_tx: incoming_resp_tx, }; outgoing_message_tx.send(outgoing_request).await @@ -123,9 +120,8 @@ impl Service { Some(request) = restart_task_rx.recv() => { let (incoming_resp_tx, incoming_resp_rx) = tokio::sync::oneshot::channel(); let fut = async move { - let resp = match incoming_resp_rx.await.context("outgoing request tx channel was dropped")? { - p2p::Response::Restart(resp) => resp, - _ => bail!("unexpected response type for restart task request"), + let p2p::Response::Restart(resp) = incoming_resp_rx.await.context("outgoing request tx channel was dropped")? else { + bail!("unexpected response type for restart task request"); }; request.response_tx.send(resp).map_err(|_|anyhow::anyhow!("caller dropped response channel"))?; Ok(()) @@ -136,7 +132,7 @@ impl Service { peer_wallet_address: request.worker_wallet_address, peer_id: request.worker_p2p_id, multiaddrs: request.worker_addresses, - request: p2p::Request::Restart.into(), + request: p2p::Request::Restart, response_tx: incoming_resp_tx, }; outgoing_message_tx.send(outgoing_request).await From ac923ca8a3739304b8cade5353befbf83cc98687 Mon Sep 17 00:00:00 2001 From: elizabeth Date: Thu, 10 Jul 2025 19:19:12 -0400 Subject: [PATCH 27/38] deps cleanup --- Cargo.lock | 6 ------ crates/orchestrator/Cargo.toml | 17 +++++++---------- crates/validator/Cargo.toml | 2 -- 3 files changed, 7 insertions(+), 18 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 1bcee1d6..7eac708f 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -6211,8 +6211,6 @@ dependencies = [ "actix-web-prometheus", "alloy", "anyhow", - "async-trait", - "base64 0.22.1", "chrono", "clap", "env_logger", @@ -6220,12 +6218,10 @@ dependencies = [ "google-cloud-auth 0.18.0", "google-cloud-storage", "hex", - "iroh", "log", "mockito", "p2p", "prometheus 0.14.0", - "rand 0.8.5", "rand 0.9.1", "redis", "redis-test", @@ -9455,10 +9451,8 @@ dependencies = [ "lazy_static", "log", "mockito", - "nalgebra", "p2p", "prometheus 0.14.0", - "rand 0.8.5", "rand 0.9.1", "redis", "redis-test", diff --git a/crates/orchestrator/Cargo.toml b/crates/orchestrator/Cargo.toml index cf31fdf5..ce733ee6 100644 --- a/crates/orchestrator/Cargo.toml +++ b/crates/orchestrator/Cargo.toml @@ -11,21 +11,14 @@ p2p = { workspace = true} shared = { workspace = true } actix-web = { workspace = true } -actix-web-prometheus = "0.1.2" alloy = { workspace = true } anyhow = { workspace = true } -async-trait = "0.1.88" -base64 = "0.22.1" chrono = { workspace = true, features = ["serde"] } clap = { workspace = true } env_logger = { workspace = true } futures = { workspace = true } -google-cloud-auth = "0.18.0" -google-cloud-storage = "0.24.0" hex = { workspace = true } log = { workspace = true } -prometheus = "0.14.0" -rand = "0.9.0" redis = { workspace = true, features = ["tokio-comp"] } redis-test = { workspace = true } reqwest = { workspace = true } @@ -34,11 +27,15 @@ serde_json = { workspace = true } tokio = { workspace = true } tokio-util = { workspace = true } url = { workspace = true } +uuid = { workspace = true } + +actix-web-prometheus = "0.1.2" +google-cloud-auth = "0.18.0" +google-cloud-storage = "0.24.0" +prometheus = "0.14.0" +rand = "0.9.0" utoipa = { version = "5.3.0", features = ["actix_extras", "chrono", "uuid"] } utoipa-swagger-ui = { version = "9.0.2", features = ["actix-web", "debug-embed", "reqwest", "vendored"] } -uuid = { workspace = true } -iroh = { workspace = true } -rand_v8 = { workspace = true } [dev-dependencies] mockito = { workspace = true } diff --git a/crates/validator/Cargo.toml b/crates/validator/Cargo.toml index 76969bb0..4d329921 100644 --- a/crates/validator/Cargo.toml +++ b/crates/validator/Cargo.toml @@ -19,9 +19,7 @@ directories = { workspace = true } env_logger = { workspace = true } futures = { workspace = true } hex = { workspace = true } -rand_v8 = { workspace = true } log = { workspace = true } -nalgebra = { workspace = true } redis = { workspace = true, features = ["tokio-comp"] } reqwest = { workspace = true } serde = { workspace = true } From f35b0012ef0404a69468bc10ce6b4cf1487481cc Mon Sep 17 00:00:00 2001 From: elizabeth Date: Thu, 10 Jul 2025 19:26:43 -0400 Subject: [PATCH 28/38] delete unused code --- Cargo.lock | 33 ---- crates/discovery/src/chainsync/sync.rs | 10 +- crates/p2p/src/protocol.rs | 6 + crates/shared/src/models/challenge.rs | 89 ---------- crates/shared/src/models/invite.rs | 20 --- crates/shared/src/models/mod.rs | 2 - crates/shared/src/p2p/client.rs | 237 ------------------------- crates/shared/src/p2p/messages.rs | 101 ----------- crates/shared/src/p2p/mod.rs | 6 - crates/shared/src/p2p/protocol.rs | 5 - crates/shared/src/p2p/service.rs | 1 + crates/validator/src/main.rs | 2 +- 12 files changed, 11 insertions(+), 501 deletions(-) delete mode 100644 crates/shared/src/models/challenge.rs delete mode 100644 crates/shared/src/models/invite.rs delete mode 100644 crates/shared/src/p2p/client.rs delete mode 100644 crates/shared/src/p2p/messages.rs delete mode 100644 crates/shared/src/p2p/protocol.rs diff --git a/Cargo.lock b/Cargo.lock index 153fcfa7..aa99f00e 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -8010,15 +8010,6 @@ dependencies = [ "syn 2.0.101", ] -[[package]] -name = "serde_spanned" -version = "0.6.8" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "87607cb1398ed59d48732e575a4c28a7a8ebf2454b964fe3f224f2afc07909e1" -dependencies = [ - "serde", -] - [[package]] name = "serde_urlencoded" version = "0.7.1" @@ -8911,26 +8902,11 @@ dependencies = [ "tokio", ] -[[package]] -name = "toml" -version = "0.8.22" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "05ae329d1f08c4d17a59bed7ff5b5a769d062e64a62d34a3261b219e62cd5aae" -dependencies = [ - "serde", - "serde_spanned", - "toml_datetime", - "toml_edit", -] - [[package]] name = "toml_datetime" version = "0.6.9" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3da5db5a963e24bc68be8b17b6fa82814bb22ee8660f192bb182771d498f09a3" -dependencies = [ - "serde", -] [[package]] name = "toml_edit" @@ -8939,19 +8915,10 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "310068873db2c5b3e7659d2cc35d21855dbafa50d1ce336397c666e3cb08137e" dependencies = [ "indexmap 2.9.0", - "serde", - "serde_spanned", "toml_datetime", - "toml_write", "winnow", ] -[[package]] -name = "toml_write" -version = "0.1.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bfb942dfe1d8e29a7ee7fcbde5bd2b9a25fb89aa70caea2eba3bee836ff41076" - [[package]] name = "tower" version = "0.5.2" diff --git a/crates/discovery/src/chainsync/sync.rs b/crates/discovery/src/chainsync/sync.rs index 6101c87a..1120d3cb 100644 --- a/crates/discovery/src/chainsync/sync.rs +++ b/crates/discovery/src/chainsync/sync.rs @@ -155,7 +155,7 @@ async fn sync_single_node( })?; let balance = provider.get_balance(node_address).await.map_err(|e| { - error!("Error retrieving balance for node {}: {}", node_address, e); + error!("Error retrieving balance for node {node_address}: {e}"); anyhow::anyhow!("Failed to retrieve node balance") })?; n.latest_balance = Some(balance); @@ -166,8 +166,7 @@ async fn sync_single_node( .await .map_err(|e| { error!( - "Error retrieving node info for provider {} and node {}: {}", - provider_address, node_address, e + "Error retrieving node info for provider {provider_address} and node {node_address}: {e}" ); anyhow::anyhow!("Failed to retrieve node info") })?; @@ -177,10 +176,7 @@ async fn sync_single_node( .get_provider(provider_address) .await .map_err(|e| { - error!( - "Error retrieving provider info for {}: {}", - provider_address, e - ); + error!("Error retrieving provider info for {provider_address}: {e}"); anyhow::anyhow!("Failed to retrieve provider info") })?; diff --git a/crates/p2p/src/protocol.rs b/crates/p2p/src/protocol.rs index ae839cec..0956ef0f 100644 --- a/crates/p2p/src/protocol.rs +++ b/crates/p2p/src/protocol.rs @@ -35,6 +35,12 @@ impl Protocol { #[derive(Debug, Clone)] pub struct Protocols(HashSet); +impl Default for Protocols { + fn default() -> Self { + Self::new() + } +} + impl Protocols { pub fn new() -> Self { Self(HashSet::new()) diff --git a/crates/shared/src/models/challenge.rs b/crates/shared/src/models/challenge.rs deleted file mode 100644 index 639cc602..00000000 --- a/crates/shared/src/models/challenge.rs +++ /dev/null @@ -1,89 +0,0 @@ -use nalgebra::DMatrix; -use serde::{ - de::{self, Visitor}, - Deserialize, Deserializer, Serialize, Serializer, -}; -use std::fmt; - -#[derive(Debug, Clone)] -pub struct FixedF64(pub f64); - -impl Serialize for FixedF64 { - fn serialize(&self, serializer: S) -> Result - where - S: Serializer, - { - // adjust precision as needed - serializer.serialize_str(&format!("{:.12}", self.0)) - } -} - -impl<'de> Deserialize<'de> for FixedF64 { - fn deserialize(deserializer: D) -> Result - where - D: Deserializer<'de>, - { - struct FixedF64Visitor; - - impl Visitor<'_> for FixedF64Visitor { - type Value = FixedF64; - - fn expecting(&self, formatter: &mut fmt::Formatter) -> fmt::Result { - formatter.write_str("a string representing a fixed precision float") - } - - fn visit_str(self, value: &str) -> Result - where - E: de::Error, - { - value - .parse::() - .map(FixedF64) - .map_err(|_| E::custom(format!("invalid f64: {value}"))) - } - } - - deserializer.deserialize_str(FixedF64Visitor) - } -} - -impl PartialEq for FixedF64 { - fn eq(&self, other: &Self) -> bool { - format!("{:.10}", self.0) == format!("{:.10}", other.0) - } -} - -#[derive(Deserialize, Serialize, Debug, Clone, PartialEq)] -pub struct ChallengeRequest { - pub rows_a: usize, - pub cols_a: usize, - pub data_a: Vec, - pub rows_b: usize, - pub cols_b: usize, - pub data_b: Vec, - pub timestamp: Option, -} - -#[derive(Deserialize, Serialize, Debug, Clone, PartialEq)] -pub struct ChallengeResponse { - pub result: Vec, - pub rows: usize, - pub cols: usize, -} - -pub fn calc_matrix(req: &ChallengeRequest) -> ChallengeResponse { - // convert FixedF64 to f64 - let data_a: Vec = req.data_a.iter().map(|x| x.0).collect(); - let data_b: Vec = req.data_b.iter().map(|x| x.0).collect(); - let a = DMatrix::from_vec(req.rows_a, req.cols_a, data_a); - let b = DMatrix::from_vec(req.rows_b, req.cols_b, data_b); - let c = a * b; - - let data_c: Vec = c.iter().map(|x| FixedF64(*x)).collect(); - - ChallengeResponse { - rows: c.nrows(), - cols: c.ncols(), - result: data_c, - } -} diff --git a/crates/shared/src/models/invite.rs b/crates/shared/src/models/invite.rs deleted file mode 100644 index 08cf2a5e..00000000 --- a/crates/shared/src/models/invite.rs +++ /dev/null @@ -1,20 +0,0 @@ -use serde::Deserialize; -use serde::Serialize; - -#[derive(Deserialize, Serialize, Debug, Clone, PartialEq)] -pub struct InviteRequest { - pub invite: String, - pub pool_id: u32, - // Either master url or ip and port - pub master_url: Option, - pub master_ip: Option, - pub master_port: Option, - pub timestamp: u64, - pub expiration: [u8; 32], - pub nonce: [u8; 32], -} - -#[derive(Deserialize, Serialize)] -pub struct InviteResponse { - pub status: String, -} diff --git a/crates/shared/src/models/mod.rs b/crates/shared/src/models/mod.rs index 0bbe8968..dea669b3 100644 --- a/crates/shared/src/models/mod.rs +++ b/crates/shared/src/models/mod.rs @@ -1,7 +1,5 @@ pub mod api; -pub mod challenge; pub mod heartbeat; -pub mod invite; pub mod metric; pub mod node; pub mod storage; diff --git a/crates/shared/src/p2p/client.rs b/crates/shared/src/p2p/client.rs deleted file mode 100644 index 54e6de45..00000000 --- a/crates/shared/src/p2p/client.rs +++ /dev/null @@ -1,237 +0,0 @@ -use alloy::primitives::Address; -use anyhow::Result; -use iroh::endpoint::{RecvStream, SendStream}; -use iroh::{Endpoint, NodeAddr, NodeId, RelayMode, SecretKey}; -use log::{debug, info}; -use std::str::FromStr; -use std::time::Duration; - -use crate::p2p::messages::{P2PMessage, P2PRequest, P2PResponse}; -use crate::p2p::protocol::PRIME_P2P_PROTOCOL; -use crate::security::request_signer::sign_message; -use crate::web3::wallet::Wallet; -use rand_v8::rngs::OsRng; -use rand_v8::Rng; - -pub struct P2PClient { - endpoint: Endpoint, - node_id: NodeId, - wallet: Wallet, -} - -impl P2PClient { - pub async fn new(wallet: Wallet) -> Result { - let mut rng = rand_v8::thread_rng(); - let secret_key = SecretKey::generate(&mut rng); - let node_id = secret_key.public(); - - let endpoint = Endpoint::builder() - .secret_key(secret_key) - .alpns(vec![PRIME_P2P_PROTOCOL.to_vec()]) - .relay_mode(RelayMode::Default) - .discovery_n0() - .bind() - .await?; - - info!("P2P client initialized with node ID: {node_id}"); - - Ok(Self { - endpoint, - node_id, - wallet, - }) - } - - pub fn node_id(&self) -> NodeId { - self.node_id - } - - pub fn endpoint(&self) -> &Endpoint { - &self.endpoint - } - - /// Helper function to write a message with length prefix - async fn write_message(send: &mut SendStream, message: &T) -> Result<()> { - let message_bytes = serde_json::to_vec(message)?; - send.write_all(&(message_bytes.len() as u32).to_be_bytes()) - .await?; - send.write_all(&message_bytes).await?; - Ok(()) - } - - /// Helper function to read a message with length prefix - async fn read_message(recv: &mut RecvStream) -> Result { - let mut len_bytes = [0u8; 4]; - recv.read_exact(&mut len_bytes).await?; - let len = u32::from_be_bytes(len_bytes) as usize; - - let mut message_bytes = vec![0u8; len]; - recv.read_exact(&mut message_bytes).await?; - - let message: T = serde_json::from_slice(&message_bytes)?; - Ok(message) - } - - pub async fn send_request( - &self, - target_p2p_id: &str, - target_addresses: &[String], - target_wallet_address: Address, - message: P2PMessage, - timeout_secs: u64, - ) -> Result { - let timeout_duration = Duration::from_secs(timeout_secs); - - tokio::time::timeout(timeout_duration, async { - self.send_request_inner( - target_p2p_id, - target_addresses, - target_wallet_address, - message, - ) - .await - }) - .await - .map_err(|_| { - anyhow::anyhow!( - "P2P request to {} timed out after {}s", - target_p2p_id, - timeout_secs - ) - })? - } - - async fn send_request_inner( - &self, - target_p2p_id: &str, - target_addresses: &[String], - target_wallet_address: Address, - message: P2PMessage, - ) -> Result { - // Parse target node ID - let node_id = NodeId::from_str(target_p2p_id)?; - - let mut socket_addrs = Vec::new(); - for addr in target_addresses { - if let Ok(socket_addr) = addr.parse() { - socket_addrs.push(socket_addr); - } - } - - if socket_addrs.is_empty() { - return Err(anyhow::anyhow!( - "No valid addresses provided for target node" - )); - } - - // Create node address - let node_addr = NodeAddr::new(node_id).with_direct_addresses(socket_addrs); - - debug!("Connecting to P2P node: {target_p2p_id}"); - - // Connect to the target node - let connection = self.endpoint.connect(node_addr, PRIME_P2P_PROTOCOL).await?; - - let (mut send, mut recv) = connection.open_bi().await?; - - // First request an auth challenge - let challenge_bytes: [u8; 32] = OsRng.gen(); - let challenge_message: String = hex::encode(challenge_bytes); - - let request_auth_challenge = P2PRequest::new(P2PMessage::RequestAuthChallenge { - message: challenge_message.clone(), - }); - Self::write_message(&mut send, &request_auth_challenge).await?; - - // Response contains the auth challenge we have to solve (to show we are the right node) - let auth_challenge_response: P2PResponse = Self::read_message(&mut recv).await?; - let auth_challenge_solution: P2PRequest = match auth_challenge_response.message { - P2PMessage::AuthChallenge { - signed_message, - message, - } => { - // Parse the signature from the server - let Ok(parsed_signature) = alloy::primitives::Signature::from_str(&signed_message) - else { - return Err(anyhow::anyhow!("Failed to parse signature from server")); - }; - - // Recover address from the challenge message that the server signed - let Ok(recovered_address) = - parsed_signature.recover_address_from_msg(&challenge_message) - else { - return Err(anyhow::anyhow!( - "Failed to recover address from server signature" - )); - }; - - // Verify the recovered address matches the expected target wallet address - if recovered_address != target_wallet_address { - return Err(anyhow::anyhow!( - "Server address verification failed: expected {}, got {}", - target_wallet_address, - recovered_address - )); - } - - debug!("Auth challenge received from node: {target_p2p_id}"); - let signature = sign_message(&message, &self.wallet).await.unwrap(); - P2PRequest::new(P2PMessage::AuthSolution { - signed_message: signature, - }) - } - _ => { - return Err(anyhow::anyhow!( - "Expected auth challenge, got different message type" - )); - } - }; - Self::write_message(&mut send, &auth_challenge_solution).await?; - - // Check if we are granted or rejected - let auth_response: P2PResponse = Self::read_message(&mut recv).await?; - match auth_response.message { - P2PMessage::AuthGranted { .. } => { - debug!("Auth granted with node: {target_p2p_id}"); - } - P2PMessage::AuthRejected { .. } => { - debug!("Auth rejected with node: {target_p2p_id}"); - return Err(anyhow::anyhow!( - "Auth rejected with node: {}", - target_p2p_id - )); - } - _ => { - return Err(anyhow::anyhow!( - "Expected auth response, got different message type" - )); - } - } - - // Now send the actual request - let request = P2PRequest::new(message); - Self::write_message(&mut send, &request).await?; - - // Read response - let response: P2PResponse = Self::read_message(&mut recv).await?; - - tokio::time::sleep(Duration::from_millis(50)).await; - - send.finish()?; - - Ok(response.message) - } - - /// Shutdown the P2P client gracefully - pub async fn shutdown(self) -> Result<()> { - info!("Shutting down P2P client with node ID: {}", self.node_id); - self.endpoint.close().await; - Ok(()) - } -} - -impl Drop for P2PClient { - fn drop(&mut self) { - debug!("P2P client dropped for node ID: {}", self.node_id); - } -} diff --git a/crates/shared/src/p2p/messages.rs b/crates/shared/src/p2p/messages.rs deleted file mode 100644 index 1624686a..00000000 --- a/crates/shared/src/p2p/messages.rs +++ /dev/null @@ -1,101 +0,0 @@ -use crate::models::challenge::{ChallengeRequest, ChallengeResponse}; -use crate::models::invite::InviteRequest; -use serde::{Deserialize, Serialize}; -use std::time::SystemTime; - -/// Maximum message size for P2P communication (1MB) -pub const MAX_MESSAGE_SIZE: usize = 1024 * 1024; - -/// P2P message types for validator-worker communication -#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)] -#[serde(tag = "type", content = "payload")] -pub enum P2PMessage { - /// Request auth challenge from worker to validator - RequestAuthChallenge { message: String }, - - /// Auth challenge from worker to validator - AuthChallenge { - signed_message: String, - message: String, - }, - - /// Auth solution from validator to worker - AuthSolution { signed_message: String }, - - /// Auth granted from worker to validator - AuthGranted {}, - - /// Auth rejected from validator to worker - AuthRejected {}, - - /// Simple ping message for connectivity testing - Ping { timestamp: SystemTime, nonce: u64 }, - - /// Response to ping - Pong { timestamp: SystemTime, nonce: u64 }, - - /// Hardware challenge from validator to worker - HardwareChallenge { - challenge: ChallengeRequest, - timestamp: SystemTime, - }, - - /// Hardware challenge response from worker to validator - HardwareChallengeResponse { - response: ChallengeResponse, - timestamp: SystemTime, - }, - - /// Invite request from orchestrator to worker - Invite(InviteRequest), - - /// Response to invite - InviteResponse { - status: String, - error: Option, - }, - - /// Get task logs from worker - GetTaskLogs, - - /// Response with task logs - GetTaskLogsResponse { logs: Result, String> }, - - /// Restart task on worker - RestartTask, - - /// Response to restart task - RestartTaskResponse { result: Result<(), String> }, -} - -/// P2P request wrapper with ID for tracking -#[derive(Debug, Clone, Serialize, Deserialize)] -pub struct P2PRequest { - pub id: String, - pub message: P2PMessage, -} - -/// P2P response wrapper with request ID -#[derive(Debug, Clone, Serialize, Deserialize)] -pub struct P2PResponse { - pub request_id: String, - pub message: P2PMessage, -} - -impl P2PRequest { - pub fn new(message: P2PMessage) -> Self { - Self { - id: uuid::Uuid::new_v4().to_string(), - message, - } - } -} - -impl P2PResponse { - pub fn new(request_id: String, message: P2PMessage) -> Self { - Self { - request_id, - message, - } - } -} diff --git a/crates/shared/src/p2p/mod.rs b/crates/shared/src/p2p/mod.rs index cac69a8a..9d0e4016 100644 --- a/crates/shared/src/p2p/mod.rs +++ b/crates/shared/src/p2p/mod.rs @@ -1,9 +1,3 @@ -pub mod client; -pub mod messages; -pub mod protocol; mod service; -pub use client::P2PClient; -pub use protocol::*; - pub use service::*; diff --git a/crates/shared/src/p2p/protocol.rs b/crates/shared/src/p2p/protocol.rs deleted file mode 100644 index 2aab189d..00000000 --- a/crates/shared/src/p2p/protocol.rs +++ /dev/null @@ -1,5 +0,0 @@ -/// Protocol ID for Prime P2P communication -pub const PRIME_P2P_PROTOCOL: &[u8] = b"prime-p2p-v1"; - -/// Timeout for P2P requests in seconds -pub const P2P_REQUEST_TIMEOUT: u64 = 30; diff --git a/crates/shared/src/p2p/service.rs b/crates/shared/src/p2p/service.rs index f5a7bbe3..064dca63 100644 --- a/crates/shared/src/p2p/service.rs +++ b/crates/shared/src/p2p/service.rs @@ -133,6 +133,7 @@ struct Context { // this assumes that there is only one outbound request per protocol per peer at a time, // is this a correct assumption? // response channel is for sending the response back to the caller who initiated the request + #[allow(clippy::type_complexity)] ongoing_outbound_requests: Arc>>>, diff --git a/crates/validator/src/main.rs b/crates/validator/src/main.rs index d17f5004..f3b80d4b 100644 --- a/crates/validator/src/main.rs +++ b/crates/validator/src/main.rs @@ -639,7 +639,7 @@ mod tests { web::{self, post}, HttpResponse, Scope, }; - use shared::models::challenge::{calc_matrix, ChallengeRequest, ChallengeResponse, FixedF64}; + use p2p::{calc_matrix, ChallengeRequest, ChallengeResponse, FixedF64}; async fn handle_challenge(challenge: web::Json) -> HttpResponse { let result = calc_matrix(&challenge); From 2475059bb13d2b91730cbdcdcaf7a4d003a8ad21 Mon Sep 17 00:00:00 2001 From: elizabeth Date: Thu, 10 Jul 2025 19:27:37 -0400 Subject: [PATCH 29/38] no port conflict --- crates/worker/src/cli/command.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/crates/worker/src/cli/command.rs b/crates/worker/src/cli/command.rs index 8f358252..2fb8a739 100644 --- a/crates/worker/src/cli/command.rs +++ b/crates/worker/src/cli/command.rs @@ -128,7 +128,7 @@ pub enum Commands { #[arg(long, default_value = "false")] with_ipfs_upload: bool, - #[arg(long, default_value = "4002")] + #[arg(long, default_value = "5001")] ipfs_port: u16, }, Check {}, From 73300bea87429b21d1a0e2f3844c6df42b93d205 Mon Sep 17 00:00:00 2001 From: elizabeth Date: Thu, 10 Jul 2025 20:59:22 -0400 Subject: [PATCH 30/38] rename messages to be more correct --- crates/orchestrator/src/p2p/mod.rs | 8 ++-- crates/p2p/src/lib.rs | 17 ++++--- crates/p2p/src/message/mod.rs | 76 +++++++++++++++--------------- crates/p2p/src/protocol.rs | 16 +++---- crates/shared/src/p2p/service.rs | 24 +++++----- crates/validator/src/p2p/mod.rs | 2 +- crates/worker/src/cli/command.rs | 3 ++ crates/worker/src/p2p/mod.rs | 30 ++++++------ 8 files changed, 91 insertions(+), 85 deletions(-) diff --git a/crates/orchestrator/src/p2p/mod.rs b/crates/orchestrator/src/p2p/mod.rs index f3bf57cf..c11ca2bf 100644 --- a/crates/orchestrator/src/p2p/mod.rs +++ b/crates/orchestrator/src/p2p/mod.rs @@ -41,7 +41,7 @@ impl Service { .with_invite() .with_get_task_logs() .with_restart() - .with_validator_authentication(), + .with_authentication(), ) .context("failed to create p2p service")?; Ok(( @@ -120,7 +120,7 @@ impl Service { Some(request) = restart_task_rx.recv() => { let (incoming_resp_tx, incoming_resp_rx) = tokio::sync::oneshot::channel(); let fut = async move { - let p2p::Response::Restart(resp) = incoming_resp_rx.await.context("outgoing request tx channel was dropped")? else { + let p2p::Response::RestartTask(resp) = incoming_resp_rx.await.context("outgoing request tx channel was dropped")? else { bail!("unexpected response type for restart task request"); }; request.response_tx.send(resp).map_err(|_|anyhow::anyhow!("caller dropped response channel"))?; @@ -132,7 +132,7 @@ impl Service { peer_wallet_address: request.worker_wallet_address, peer_id: request.worker_p2p_id, multiaddrs: request.worker_addresses, - request: p2p::Request::Restart, + request: p2p::Request::RestartTask, response_tx: incoming_resp_tx, }; outgoing_message_tx.send(outgoing_request).await @@ -167,5 +167,5 @@ pub struct RestartTaskRequest { pub(crate) worker_wallet_address: alloy::primitives::Address, pub(crate) worker_p2p_id: String, pub(crate) worker_addresses: Vec, - pub(crate) response_tx: tokio::sync::oneshot::Sender, + pub(crate) response_tx: tokio::sync::oneshot::Sender, } diff --git a/crates/p2p/src/lib.rs b/crates/p2p/src/lib.rs index 0ad1e4a5..f5fedad3 100644 --- a/crates/p2p/src/lib.rs +++ b/crates/p2p/src/lib.rs @@ -9,6 +9,7 @@ use libp2p::SwarmBuilder; use libp2p::{identity, Transport}; use std::time::Duration; use tracing::debug; +use tracing::info; mod behaviour; mod message; @@ -134,20 +135,24 @@ impl Node { event = swarm.select_next_some() => { match event { SwarmEvent::NewListenAddr { - listener_id: _, address, + .. } => { debug!("new listen address: {address}"); } SwarmEvent::ExternalAddrConfirmed { address } => { debug!("external address confirmed: {address}"); } + SwarmEvent::ConnectionEstablished { + peer_id, + .. + } => { + info!("connection established with peer {peer_id}"); + } SwarmEvent::ConnectionClosed { peer_id, cause, - endpoint: _, - connection_id: _, - num_established: _, + .. } => { debug!("connection closed with peer {peer_id}: {cause:?}"); } @@ -209,8 +214,8 @@ impl NodeBuilder { self } - pub fn with_validator_authentication(mut self) -> Self { - self.protocols = self.protocols.with_validator_authentication(); + pub fn with_authentication(mut self) -> Self { + self.protocols = self.protocols.with_authentication(); self } diff --git a/crates/p2p/src/message/mod.rs b/crates/p2p/src/message/mod.rs index dc2403e3..74b09c5a 100644 --- a/crates/p2p/src/message/mod.rs +++ b/crates/p2p/src/message/mod.rs @@ -27,11 +27,11 @@ pub enum OutgoingMessage { #[derive(Debug, Clone, Serialize, Deserialize)] pub enum Request { - ValidatorAuthentication(ValidatorAuthenticationRequest), + Authentication(AuthenticationRequest), HardwareChallenge(HardwareChallengeRequest), Invite(InviteRequest), GetTaskLogs, - Restart, + RestartTask, General(GeneralRequest), } @@ -46,11 +46,11 @@ impl Request { pub fn protocol(&self) -> Protocol { match self { - Request::ValidatorAuthentication(_) => Protocol::ValidatorAuthentication, + Request::Authentication(_) => Protocol::Authentication, Request::HardwareChallenge(_) => Protocol::HardwareChallenge, Request::Invite(_) => Protocol::Invite, Request::GetTaskLogs => Protocol::GetTaskLogs, - Request::Restart => Protocol::Restart, + Request::RestartTask => Protocol::Restart, Request::General(_) => Protocol::General, } } @@ -58,11 +58,11 @@ impl Request { #[derive(Debug, Clone, Serialize, Deserialize)] pub enum Response { - ValidatorAuthentication(ValidatorAuthenticationResponse), + Authentication(AuthenticationResponse), HardwareChallenge(HardwareChallengeResponse), Invite(InviteResponse), GetTaskLogs(GetTaskLogsResponse), - Restart(RestartResponse), + RestartTask(RestartTaskResponse), General(GeneralResponse), } @@ -76,72 +76,72 @@ impl Response { } #[derive(Debug, Clone, Serialize, Deserialize)] -pub enum ValidatorAuthenticationRequest { - Initiation(ValidatorAuthenticationInitiationRequest), - Solution(ValidatorAuthenticationSolutionRequest), +pub enum AuthenticationRequest { + Initiation(AuthenticationInitiationRequest), + Solution(AuthenticationSolutionRequest), } -impl From for Request { - fn from(request: ValidatorAuthenticationRequest) -> Self { - Request::ValidatorAuthentication(request) +impl From for Request { + fn from(request: AuthenticationRequest) -> Self { + Request::Authentication(request) } } #[derive(Debug, Clone, Serialize, Deserialize)] -pub enum ValidatorAuthenticationResponse { - Initiation(ValidatorAuthenticationInitiationResponse), - Solution(ValidatorAuthenticationSolutionResponse), +pub enum AuthenticationResponse { + Initiation(AuthenticationInitiationResponse), + Solution(AuthenticationSolutionResponse), } -impl From for Response { - fn from(response: ValidatorAuthenticationResponse) -> Self { - Response::ValidatorAuthentication(response) +impl From for Response { + fn from(response: AuthenticationResponse) -> Self { + Response::Authentication(response) } } #[derive(Debug, Clone, Serialize, Deserialize)] -pub struct ValidatorAuthenticationInitiationRequest { +pub struct AuthenticationInitiationRequest { pub message: String, } -impl From for Request { - fn from(request: ValidatorAuthenticationInitiationRequest) -> Self { - Request::ValidatorAuthentication(ValidatorAuthenticationRequest::Initiation(request)) +impl From for Request { + fn from(request: AuthenticationInitiationRequest) -> Self { + Request::Authentication(AuthenticationRequest::Initiation(request)) } } #[derive(Debug, Clone, Serialize, Deserialize)] -pub struct ValidatorAuthenticationInitiationResponse { +pub struct AuthenticationInitiationResponse { pub signature: String, pub message: String, } -impl From for Response { - fn from(response: ValidatorAuthenticationInitiationResponse) -> Self { - Response::ValidatorAuthentication(ValidatorAuthenticationResponse::Initiation(response)) +impl From for Response { + fn from(response: AuthenticationInitiationResponse) -> Self { + Response::Authentication(AuthenticationResponse::Initiation(response)) } } #[derive(Debug, Clone, Serialize, Deserialize)] -pub struct ValidatorAuthenticationSolutionRequest { +pub struct AuthenticationSolutionRequest { pub signature: String, } -impl From for Request { - fn from(request: ValidatorAuthenticationSolutionRequest) -> Self { - Request::ValidatorAuthentication(ValidatorAuthenticationRequest::Solution(request)) +impl From for Request { + fn from(request: AuthenticationSolutionRequest) -> Self { + Request::Authentication(AuthenticationRequest::Solution(request)) } } #[derive(Debug, Clone, Serialize, Deserialize)] -pub enum ValidatorAuthenticationSolutionResponse { +pub enum AuthenticationSolutionResponse { Granted, Rejected, } -impl From for Response { - fn from(response: ValidatorAuthenticationSolutionResponse) -> Self { - Response::ValidatorAuthentication(ValidatorAuthenticationResponse::Solution(response)) +impl From for Response { + fn from(response: AuthenticationSolutionResponse) -> Self { + Response::Authentication(AuthenticationResponse::Solution(response)) } } @@ -216,14 +216,14 @@ impl From for Response { } #[derive(Debug, Clone, Serialize, Deserialize)] -pub enum RestartResponse { +pub enum RestartTaskResponse { Ok, Error(String), } -impl From for Response { - fn from(response: RestartResponse) -> Self { - Response::Restart(response) +impl From for Response { + fn from(response: RestartTaskResponse) -> Self { + Response::RestartTask(response) } } diff --git a/crates/p2p/src/protocol.rs b/crates/p2p/src/protocol.rs index 0956ef0f..f721bea6 100644 --- a/crates/p2p/src/protocol.rs +++ b/crates/p2p/src/protocol.rs @@ -3,8 +3,8 @@ use std::{collections::HashSet, hash::Hash}; #[derive(Debug, Clone, PartialEq, Eq, Hash)] pub enum Protocol { - // validator -> worker - ValidatorAuthentication, + // validator or orchestrator -> worker + Authentication, // validator -> worker HardwareChallenge, // orchestrator -> worker @@ -20,9 +20,7 @@ pub enum Protocol { impl Protocol { pub(crate) fn as_stream_protocol(&self) -> StreamProtocol { match self { - Protocol::ValidatorAuthentication => { - StreamProtocol::new("/prime/validator_authentication/1.0.0") - } + Protocol::Authentication => StreamProtocol::new("/prime/authentication/1.0.0"), Protocol::HardwareChallenge => StreamProtocol::new("/prime/hardware_challenge/1.0.0"), Protocol::Invite => StreamProtocol::new("/prime/invite/1.0.0"), Protocol::GetTaskLogs => StreamProtocol::new("/prime/get_task_logs/1.0.0"), @@ -46,8 +44,8 @@ impl Protocols { Self(HashSet::new()) } - pub fn has_validator_authentication(&self) -> bool { - self.0.contains(&Protocol::ValidatorAuthentication) + pub fn has_authentication(&self) -> bool { + self.0.contains(&Protocol::Authentication) } pub fn has_hardware_challenge(&self) -> bool { @@ -70,8 +68,8 @@ impl Protocols { self.0.contains(&Protocol::General) } - pub fn with_validator_authentication(mut self) -> Self { - self.0.insert(Protocol::ValidatorAuthentication); + pub fn with_authentication(mut self) -> Self { + self.0.insert(Protocol::Authentication); self } diff --git a/crates/shared/src/p2p/service.rs b/crates/shared/src/p2p/service.rs index 064dca63..bd817ea1 100644 --- a/crates/shared/src/p2p/service.rs +++ b/crates/shared/src/p2p/service.rs @@ -2,9 +2,9 @@ use crate::web3::wallet::Wallet; use anyhow::{bail, Context as _, Result}; use futures::stream::FuturesUnordered; use p2p::{ + AuthenticationInitiationRequest, AuthenticationResponse, AuthenticationSolutionRequest, IncomingMessage, Libp2pIncomingMessage, Node, NodeBuilder, OutgoingMessage, PeerId, Protocol, - Protocols, Response, ValidatorAuthenticationInitiationRequest, ValidatorAuthenticationResponse, - ValidatorAuthenticationSolutionRequest, + Protocols, Response, }; use std::collections::HashMap; use std::collections::HashSet; @@ -115,7 +115,7 @@ fn build_p2p_node( NodeBuilder::new() .with_keypair(keypair) .with_port(port) - .with_validator_authentication() + .with_authentication() .with_protocols(protocols) .with_cancellation_token(cancellation_token) .try_build() @@ -233,7 +233,7 @@ async fn handle_outgoing_message( let challenge_bytes: [u8; 32] = OsRng.gen(); let auth_challenge_message: String = hex::encode(challenge_bytes); - let req: p2p::Request = ValidatorAuthenticationInitiationRequest { + let req: p2p::Request = AuthenticationInitiationRequest { message: auth_challenge_message.clone(), } .into(); @@ -288,7 +288,7 @@ async fn handle_incoming_response( context: Context, ) -> Result<()> { match response { - p2p::Response::ValidatorAuthentication(resp) => { + p2p::Response::Authentication(resp) => { log::debug!("received ValidatorAuthenticationSolutionResponse from {from}: {resp:?}"); handle_validation_authentication_response(from, resp, context) .await @@ -337,7 +337,7 @@ async fn handle_incoming_response( }; let _ = response_tx.send(response); } - p2p::Response::Restart(ref resp) => { + p2p::Response::RestartTask(ref resp) => { if !context.protocols.has_restart() { bail!("received RestartResponse from {from}, but restart protocol is not enabled"); } @@ -370,14 +370,14 @@ async fn handle_incoming_response( async fn handle_validation_authentication_response( from: PeerId, - response: p2p::ValidatorAuthenticationResponse, + response: p2p::AuthenticationResponse, context: Context, ) -> Result<()> { use crate::security::request_signer::sign_message; use std::str::FromStr as _; match response { - ValidatorAuthenticationResponse::Initiation(req) => { + AuthenticationResponse::Initiation(req) => { let ongoing_auth_requests = context.ongoing_auth_requests.read().await; let Some(ongoing_challenge) = ongoing_auth_requests.get(&from) else { bail!( @@ -408,7 +408,7 @@ async fn handle_validation_authentication_response( log::debug!("auth challenge initiation response received from node: {from}"); let signature = sign_message(&req.message, &context.wallet).await.unwrap(); - let req: p2p::Request = ValidatorAuthenticationSolutionRequest { signature }.into(); + let req: p2p::Request = AuthenticationSolutionRequest { signature }.into(); let req = req.into_outgoing_message(from, vec![]); context .outgoing_messages @@ -416,7 +416,7 @@ async fn handle_validation_authentication_response( .await .context("failed to send outgoing message")?; } - ValidatorAuthenticationResponse::Solution(req) => { + AuthenticationResponse::Solution(req) => { let mut ongoing_auth_requests = context.ongoing_auth_requests.write().await; let Some(ongoing_challenge) = ongoing_auth_requests.remove(&from) else { bail!( @@ -425,8 +425,8 @@ async fn handle_validation_authentication_response( }; match req { - p2p::ValidatorAuthenticationSolutionResponse::Granted => {} - p2p::ValidatorAuthenticationSolutionResponse::Rejected => { + p2p::AuthenticationSolutionResponse::Granted => {} + p2p::AuthenticationSolutionResponse::Rejected => { log::debug!("auth challenge rejected by node: {from}"); return Ok(()); } diff --git a/crates/validator/src/p2p/mod.rs b/crates/validator/src/p2p/mod.rs index dc6b23e6..24811586 100644 --- a/crates/validator/src/p2p/mod.rs +++ b/crates/validator/src/p2p/mod.rs @@ -30,7 +30,7 @@ impl Service { wallet, Protocols::new() .with_hardware_challenge() - .with_validator_authentication(), + .with_authentication(), ) .context("failed to create p2p service")?; Ok(( diff --git a/crates/worker/src/cli/command.rs b/crates/worker/src/cli/command.rs index 2fb8a739..2ff06e8d 100644 --- a/crates/worker/src/cli/command.rs +++ b/crates/worker/src/cli/command.rs @@ -746,11 +746,14 @@ pub async fn execute_command( let peer_id = p2p_service.peer_id(); node_config.worker_p2p_id = Some(peer_id.to_string()); + let external_p2p_address = + format!("/ip4/{}/tcp/{}", node_config.ip_address, *libp2p_port,); node_config.worker_p2p_addresses = Some( p2p_service .listen_addrs() .iter() .map(|addr| addr.to_string()) + .chain(std::iter::once(external_p2p_address)) .collect(), ); tokio::task::spawn(p2p_service.run()); diff --git a/crates/worker/src/p2p/mod.rs b/crates/worker/src/p2p/mod.rs index 748d1d54..61b682c5 100644 --- a/crates/worker/src/p2p/mod.rs +++ b/crates/worker/src/p2p/mod.rs @@ -113,7 +113,7 @@ fn build_p2p_node( let (node, _, incoming_message_rx, outgoing_message_tx) = NodeBuilder::new() .with_keypair(keypair) .with_port(port) - .with_validator_authentication() + .with_authentication() .with_hardware_challenge() .with_invite() .with_get_task_logs() @@ -201,24 +201,24 @@ async fn handle_incoming_request( context: Context, ) -> Result<()> { let resp = match request { - p2p::Request::ValidatorAuthentication(req) => { + p2p::Request::Authentication(req) => { tracing::debug!("handling ValidatorAuthentication request"); match req { - p2p::ValidatorAuthenticationRequest::Initiation(req) => { + p2p::AuthenticationRequest::Initiation(req) => { handle_validator_authentication_initiation_request(from, req, &context) .await .context("failed to handle ValidatorAuthenticationInitiationRequest")? } - p2p::ValidatorAuthenticationRequest::Solution(req) => { + p2p::AuthenticationRequest::Solution(req) => { match handle_validator_authentication_solution_request(from, req, &context) .await { - Ok(()) => p2p::ValidatorAuthenticationSolutionResponse::Granted.into(), + Ok(()) => p2p::AuthenticationSolutionResponse::Granted.into(), Err(e) => { tracing::error!( "failed to handle ValidatorAuthenticationSolutionRequest: {e:?}" ); - p2p::ValidatorAuthenticationSolutionResponse::Rejected.into() + p2p::AuthenticationSolutionResponse::Rejected.into() } } } @@ -241,7 +241,7 @@ async fn handle_incoming_request( tracing::debug!("handling GetTaskLogs request"); handle_get_task_logs_request(from, &context).await } - p2p::Request::Restart => { + p2p::Request::RestartTask => { tracing::debug!("handling Restart request"); handle_restart_request(from, &context).await } @@ -262,7 +262,7 @@ async fn handle_incoming_request( async fn handle_validator_authentication_initiation_request( from: PeerId, - req: p2p::ValidatorAuthenticationInitiationRequest, + req: p2p::AuthenticationInitiationRequest, context: &Context, ) -> Result { use rand_v8::Rng as _; @@ -283,7 +283,7 @@ async fn handle_validator_authentication_initiation_request( let mut ongoing_auth_challenges = context.ongoing_auth_challenges.write().await; ongoing_auth_challenges.insert(from, challenge_message.clone()); - Ok(p2p::ValidatorAuthenticationInitiationResponse { + Ok(p2p::AuthenticationInitiationResponse { message: challenge_message, signature, } @@ -292,7 +292,7 @@ async fn handle_validator_authentication_initiation_request( async fn handle_validator_authentication_solution_request( from: PeerId, - req: p2p::ValidatorAuthenticationSolutionRequest, + req: p2p::AuthenticationSolutionRequest, context: &Context, ) -> Result<()> { use std::str::FromStr as _; @@ -358,19 +358,19 @@ async fn handle_get_task_logs_request(from: PeerId, context: &Context) -> Respon async fn handle_restart_request(from: PeerId, context: &Context) -> Response { let authorized_peers = context.authorized_peers.read().await; if !authorized_peers.contains(&from) { - return p2p::RestartResponse::Error("unauthorized".to_string()).into(); + return p2p::RestartTaskResponse::Error("unauthorized".to_string()).into(); } match context.docker_service.restart_task().await { - Ok(()) => p2p::RestartResponse::Ok.into(), - Err(e) => p2p::RestartResponse::Error(format!("failed to restart task: {e:?}")).into(), + Ok(()) => p2p::RestartTaskResponse::Ok.into(), + Err(e) => p2p::RestartTaskResponse::Error(format!("failed to restart task: {e:?}")).into(), } } fn handle_incoming_response(response: p2p::Response) { // critical developer error if any of these happen, could panic here match response { - p2p::Response::ValidatorAuthentication(_) => { + p2p::Response::Authentication(_) => { tracing::error!("worker should never receive ValidatorAuthentication responses"); } p2p::Response::HardwareChallenge(_) => { @@ -382,7 +382,7 @@ fn handle_incoming_response(response: p2p::Response) { p2p::Response::GetTaskLogs(_) => { tracing::error!("worker should never receive GetTaskLogs responses"); } - p2p::Response::Restart(_) => { + p2p::Response::RestartTask(_) => { tracing::error!("worker should never receive Restart responses"); } p2p::Response::General(_) => { From e135ad463f7eed53146f4293d5a18d3f08dea096 Mon Sep 17 00:00:00 2001 From: elizabeth Date: Thu, 10 Jul 2025 23:18:34 -0400 Subject: [PATCH 31/38] add logging --- Cargo.lock | 1 + crates/p2p/Cargo.toml | 1 + crates/p2p/src/behaviour.rs | 9 +++--- crates/p2p/src/lib.rs | 28 +++++++++++++++++- crates/shared/src/p2p/service.rs | 29 ++++++++++++------- crates/validator/src/p2p/mod.rs | 1 + .../src/validators/hardware_challenge.rs | 9 ++++++ crates/worker/src/cli/command.rs | 4 +-- crates/worker/src/p2p/mod.rs | 4 ++- crates/worker/src/utils/logging.rs | 4 --- 10 files changed, 67 insertions(+), 23 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index aa99f00e..67fc79bd 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -6216,6 +6216,7 @@ version = "0.3.11" dependencies = [ "anyhow", "libp2p", + "log", "nalgebra", "serde", "tokio", diff --git a/crates/p2p/Cargo.toml b/crates/p2p/Cargo.toml index bb670107..498fbd29 100644 --- a/crates/p2p/Cargo.toml +++ b/crates/p2p/Cargo.toml @@ -13,6 +13,7 @@ serde = {workspace = true} tokio = {workspace = true, features = ["sync"]} tokio-util = { workspace = true, features = ["rt"] } tracing = { workspace = true } +log = { workspace = true } [lints] workspace = true diff --git a/crates/p2p/src/behaviour.rs b/crates/p2p/src/behaviour.rs index b114b61e..9d92be15 100644 --- a/crates/p2p/src/behaviour.rs +++ b/crates/p2p/src/behaviour.rs @@ -6,7 +6,7 @@ use libp2p::connection_limits::ConnectionLimits; use libp2p::identify; use libp2p::identity; use libp2p::kad; -use libp2p::kad::store::MemoryStore; +// use libp2p::kad::store::MemoryStore; use libp2p::mdns; use libp2p::ping; use libp2p::request_response; @@ -27,7 +27,8 @@ pub(crate) struct Behaviour { // discovery mdns: mdns::tokio::Behaviour, - kademlia: kad::Behaviour, + // comment out kademlia for now as it requires bootnodes to be provided + // kademlia: kad::Behaviour, // protocols identify: identify::Behaviour, @@ -113,7 +114,7 @@ impl Behaviour { let mdns = mdns::tokio::Behaviour::new(mdns::Config::default(), peer_id) .context("failed to create mDNS behaviour")?; - let kademlia = kad::Behaviour::new(peer_id, MemoryStore::new(peer_id)); + // let kademlia = kad::Behaviour::new(peer_id, MemoryStore::new(peer_id)); let identify = identify::Behaviour::new( identify::Config::new(PRIME_STREAM_PROTOCOL.to_string(), keypair.public()) @@ -124,7 +125,7 @@ impl Behaviour { Ok(Self { autonat, connection_limits, - kademlia, + // kademlia, mdns, identify, ping, diff --git a/crates/p2p/src/lib.rs b/crates/p2p/src/lib.rs index f5fedad3..896698c8 100644 --- a/crates/p2p/src/lib.rs +++ b/crates/p2p/src/lib.rs @@ -102,13 +102,15 @@ impl Node { loop { tokio::select! { + biased; _ = cancellation_token.cancelled() => { debug!("cancellation token triggered, shutting down node"); break Ok(()); } Some((addrs, res_tx)) = dial_rx.recv() => { + log::info!("dialing addresses: {addrs:?}"); let mut res = Ok(()); - for addr in addrs { + for addr in &addrs { match swarm.dial(addr.clone()) { Ok(_) => {} Err(e) => { @@ -117,15 +119,18 @@ impl Node { } } } + log::info!("finished dialing addresses: {addrs:?}"); let _ = res_tx.send(res); } Some(message) = outgoing_message_rx.recv() => { match message { OutgoingMessage::Request((peer, _addrs, request)) => { // TODO: if we're not connected to the peer, we should dial it + log::info!("sending request to peer {peer}: {request:?}"); swarm.behaviour_mut().request_response().send_request(&peer, request); } OutgoingMessage::Response((channel, response)) => { + log::info!("sending response: {response:?}"); if let Err(e) = swarm.behaviour_mut().request_response().send_response(channel, response) { debug!("failed to send response: {e:?}"); } @@ -359,6 +364,27 @@ mod test { use super::NodeBuilder; use crate::message; + #[tokio::test] + async fn can_dial() { + let (node1, _, _, _) = NodeBuilder::new().with_port(4002).try_build().unwrap(); + let node1_peer_id = node1.peer_id(); + let local_p2p_address: crate::Multiaddr = + format!("/ip4/127.0.0.1/tcp/4002/p2p/{}", node1_peer_id) + .parse() + .expect("can parse valid multiaddr"); + let (node2, dial_tx2, _, _) = NodeBuilder::new().try_build().unwrap(); + tokio::spawn(async move { node1.run().await }); + tokio::spawn(async move { node2.run().await }); + + let (res_tx, res_rx) = tokio::sync::oneshot::channel(); + dial_tx2 + .send((vec![local_p2p_address], res_tx)) + .await + .expect("can send dial request"); + let res = res_rx.await.expect("can receive dial response"); + assert!(res.is_ok(), "dialing node1 should succeed: {res:?}"); + } + #[tokio::test] async fn two_nodes_can_connect_and_do_request_response() { let (node1, _, mut incoming_message_rx1, outgoing_message_tx1) = diff --git a/crates/shared/src/p2p/service.rs b/crates/shared/src/p2p/service.rs index bd817ea1..f0d504ca 100644 --- a/crates/shared/src/p2p/service.rs +++ b/crates/shared/src/p2p/service.rs @@ -25,7 +25,7 @@ pub struct OutgoingRequest { /// It handles the authentication protocol used before sending /// requests to the worker. pub struct Service { - _node: Node, + node: Node, dial_tx: p2p::DialSender, incoming_messages_rx: Receiver, outgoing_messages_rx: Receiver, @@ -48,7 +48,7 @@ impl Service { Ok(( Self { - _node: node, + node, dial_tx, incoming_messages_rx, outgoing_messages_rx, @@ -63,15 +63,17 @@ impl Service { use futures::StreamExt as _; let Self { - _node, + node, dial_tx, mut incoming_messages_rx, mut outgoing_messages_rx, cancellation_token, context, } = self; + tokio::task::spawn(node.run()); - let mut message_handlers = FuturesUnordered::new(); + let mut incoming_message_handlers = FuturesUnordered::new(); + let mut outgoing_message_handlers = FuturesUnordered::new(); loop { tokio::select! { @@ -79,23 +81,26 @@ impl Service { break; } Some(message) = outgoing_messages_rx.recv() => { - if let Err(e) = handle_outgoing_message(message, dial_tx.clone(), context.clone()) - .await { - log::error!("failed to handle outgoing message: {e}"); - } + let handle = tokio::task::spawn(handle_outgoing_message(message, dial_tx.clone(), context.clone())); + outgoing_message_handlers.push(handle); } Some(message) = incoming_messages_rx.recv() => { let context = context.clone(); let handle = tokio::task::spawn( handle_incoming_message(message, context) ); - message_handlers.push(handle); + incoming_message_handlers.push(handle); } - Some(res) = message_handlers.next() => { + Some(res) = incoming_message_handlers.next() => { if let Err(e) = res { log::error!("failed to handle incoming message: {e}"); } } + Some(res) = outgoing_message_handlers.next() => { + if let Err(e) = res { + log::error!("failed to handle outgoing message: {e}"); + } + } } } } @@ -200,7 +205,7 @@ async fn handle_outgoing_message( return Ok(()); } - log::debug!("sending validation authentication request to {peer_id}"); + log::info!("sending validation authentication request to {peer_id}"); // first, dial the worker // ensure there's no ongoing challenge @@ -224,10 +229,12 @@ async fn handle_outgoing_message( .send((multiaddrs.clone(), res_tx)) .await .context("failed to send dial request")?; + log::info!("dialing worker {peer_id} with multiaddrs: {multiaddrs:?}"); res_rx .await .context("failed to receive dial response")? .context("failed to dial worker")?; + log::info!("dialed worker {peer_id} with multiaddrs: {multiaddrs:?}"); // create the authentication challenge request message let challenge_bytes: [u8; 32] = OsRng.gen(); diff --git a/crates/validator/src/p2p/mod.rs b/crates/validator/src/p2p/mod.rs index 24811586..6fa8fac7 100644 --- a/crates/validator/src/p2p/mod.rs +++ b/crates/validator/src/p2p/mod.rs @@ -59,6 +59,7 @@ impl Service { loop { tokio::select! { Some(request) = hardware_challenge_rx.recv() => { + println!("p2p: got hardware challenge"); let (incoming_resp_tx, incoming_resp_rx) = tokio::sync::oneshot::channel(); let fut = async move { let resp = match incoming_resp_rx.await.context("outgoing request tx channel was dropped")? { diff --git a/crates/validator/src/validators/hardware_challenge.rs b/crates/validator/src/validators/hardware_challenge.rs index 6970355d..5580096e 100644 --- a/crates/validator/src/validators/hardware_challenge.rs +++ b/crates/validator/src/validators/hardware_challenge.rs @@ -30,6 +30,11 @@ impl HardwareChallenge { .clone() .ok_or_else(|| anyhow::anyhow!("Node {} does not have P2P addresses", node.id))?; + println!( + "Challenging node {} with P2P ID: {} and addresses: {:?}", + node.id, p2p_id, p2p_addresses + ); + // create random challenge matrix let challenge_matrix = self.random_challenge(3, 3, 3, 3); let challenge_expected = p2p::calc_matrix(&challenge_matrix); @@ -60,10 +65,14 @@ impl HardwareChallenge { .await .context("failed to send hardware challenge request to p2p service")?; + println!("hardware challenge sent to node {}", node.id); + let resp = response_rx .await .context("failed to receive response from node")?; + println!("response received from node {}: {:?}", node.id, resp); + if challenge_expected.result == resp.result { info!("Challenge for node {} successful", node.id); } else { diff --git a/crates/worker/src/cli/command.rs b/crates/worker/src/cli/command.rs index 2ff06e8d..539de1ae 100644 --- a/crates/worker/src/cli/command.rs +++ b/crates/worker/src/cli/command.rs @@ -747,13 +747,13 @@ pub async fn execute_command( let peer_id = p2p_service.peer_id(); node_config.worker_p2p_id = Some(peer_id.to_string()); let external_p2p_address = - format!("/ip4/{}/tcp/{}", node_config.ip_address, *libp2p_port,); + format!("/ip4/{}/tcp/{}", node_config.ip_address, *libp2p_port); node_config.worker_p2p_addresses = Some( p2p_service .listen_addrs() .iter() .map(|addr| addr.to_string()) - .chain(std::iter::once(external_p2p_address)) + .chain(vec![external_p2p_address]) .collect(), ); tokio::task::spawn(p2p_service.run()); diff --git a/crates/worker/src/p2p/mod.rs b/crates/worker/src/p2p/mod.rs index 61b682c5..c8464a7a 100644 --- a/crates/worker/src/p2p/mod.rs +++ b/crates/worker/src/p2p/mod.rs @@ -75,12 +75,14 @@ impl Service { use futures::StreamExt as _; let Self { - node: _, + node, mut incoming_messages, cancellation_token, context, } = self; + tokio::task::spawn(node.run()); + let mut message_handlers = FuturesUnordered::new(); loop { diff --git a/crates/worker/src/utils/logging.rs b/crates/worker/src/utils/logging.rs index 18c8de4b..312d565c 100644 --- a/crates/worker/src/utils/logging.rs +++ b/crates/worker/src/utils/logging.rs @@ -75,10 +75,6 @@ pub fn setup_logging(cli: Option<&Cli>) -> Result<(), Box Date: Fri, 11 Jul 2025 11:09:23 -0400 Subject: [PATCH 32/38] fix tests --- crates/orchestrator/src/api/tests/helper.rs | 12 ++++++------ .../validator/src/validators/hardware_challenge.rs | 4 ++-- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/crates/orchestrator/src/api/tests/helper.rs b/crates/orchestrator/src/api/tests/helper.rs index 92b26cce..f4204262 100644 --- a/crates/orchestrator/src/api/tests/helper.rs +++ b/crates/orchestrator/src/api/tests/helper.rs @@ -46,8 +46,8 @@ pub(crate) async fn create_test_app_state() -> Data { let mock_storage = MockStorageProvider::new(); let storage_provider = Arc::new(mock_storage); let metrics = Arc::new(MetricsContext::new(1.to_string())); - let (get_task_logs_tx, _) = tokio::sync::mpsc::channel(0); - let (restart_task_tx, _) = tokio::sync::mpsc::channel(0); + let (get_task_logs_tx, _) = tokio::sync::mpsc::channel(1); + let (restart_task_tx, _) = tokio::sync::mpsc::channel(1); Data::new(AppState { store_context: store_context.clone(), @@ -112,8 +112,8 @@ pub(crate) async fn create_test_app_state_with_nodegroups() -> Data { let mock_storage = MockStorageProvider::new(); let storage_provider = Arc::new(mock_storage); let metrics = Arc::new(MetricsContext::new(1.to_string())); - let (get_task_logs_tx, _) = tokio::sync::mpsc::channel(0); - let (restart_task_tx, _) = tokio::sync::mpsc::channel(0); + let (get_task_logs_tx, _) = tokio::sync::mpsc::channel(1); + let (restart_task_tx, _) = tokio::sync::mpsc::channel(1); Data::new(AppState { store_context: store_context.clone(), @@ -175,8 +175,8 @@ pub(crate) async fn create_test_app_state_with_metrics() -> Data { let mock_storage = MockStorageProvider::new(); let storage_provider = Arc::new(mock_storage); let metrics = Arc::new(MetricsContext::new("0".to_string())); - let (get_task_logs_tx, _) = tokio::sync::mpsc::channel(0); - let (restart_task_tx, _) = tokio::sync::mpsc::channel(0); + let (get_task_logs_tx, _) = tokio::sync::mpsc::channel(1); + let (restart_task_tx, _) = tokio::sync::mpsc::channel(1); Data::new(AppState { store_context: store_context.clone(), diff --git a/crates/validator/src/validators/hardware_challenge.rs b/crates/validator/src/validators/hardware_challenge.rs index 5580096e..6d8df988 100644 --- a/crates/validator/src/validators/hardware_challenge.rs +++ b/crates/validator/src/validators/hardware_challenge.rs @@ -65,13 +65,13 @@ impl HardwareChallenge { .await .context("failed to send hardware challenge request to p2p service")?; - println!("hardware challenge sent to node {}", node.id); + info!("hardware challenge sent to node {}", node.id); let resp = response_rx .await .context("failed to receive response from node")?; - println!("response received from node {}: {:?}", node.id, resp); + info!("response received from node {}: {:?}", node.id, resp); if challenge_expected.result == resp.result { info!("Challenge for node {} successful", node.id); From 4798692eb16acfc738f7433c7e67102d6eb1fc89 Mon Sep 17 00:00:00 2001 From: elizabeth Date: Fri, 11 Jul 2025 11:32:19 -0400 Subject: [PATCH 33/38] remove explicit dialing, messaging now working --- crates/p2p/src/behaviour.rs | 3 +- crates/p2p/src/lib.rs | 62 +++---------------- crates/shared/src/p2p/service.rs | 40 +++--------- .../src/validators/hardware_challenge.rs | 4 -- crates/worker/src/p2p/mod.rs | 2 +- 5 files changed, 18 insertions(+), 93 deletions(-) diff --git a/crates/p2p/src/behaviour.rs b/crates/p2p/src/behaviour.rs index 9d92be15..399693b5 100644 --- a/crates/p2p/src/behaviour.rs +++ b/crates/p2p/src/behaviour.rs @@ -11,8 +11,8 @@ use libp2p::mdns; use libp2p::ping; use libp2p::request_response; use libp2p::swarm::NetworkBehaviour; +use log::debug; use std::time::Duration; -use tracing::debug; use crate::message::IncomingMessage; use crate::message::{Request, Response}; @@ -155,6 +155,7 @@ impl BehaviourEvent { BehaviourEvent::RequestResponse(event) => match event { request_response::Event::Message { peer, message } => { debug!("received message from peer {peer:?}: {message:?}"); + // if this errors, user dropped their incoming message channel let _ = message_tx.send(IncomingMessage { peer, message }).await; } diff --git a/crates/p2p/src/lib.rs b/crates/p2p/src/lib.rs index 896698c8..f5bc648c 100644 --- a/crates/p2p/src/lib.rs +++ b/crates/p2p/src/lib.rs @@ -7,9 +7,8 @@ use libp2p::yamux; use libp2p::Swarm; use libp2p::SwarmBuilder; use libp2p::{identity, Transport}; +use log::debug; use std::time::Duration; -use tracing::debug; -use tracing::info; mod behaviour; mod message; @@ -25,8 +24,6 @@ pub type ResponseChannel = libp2p::request_response::ResponseChannel; pub type PeerId = libp2p::PeerId; pub type Multiaddr = libp2p::Multiaddr; pub type Keypair = libp2p::identity::Keypair; -pub type DialSender = - tokio::sync::mpsc::Sender<(Vec, tokio::sync::oneshot::Sender>)>; pub const PRIME_STREAM_PROTOCOL: libp2p::StreamProtocol = libp2p::StreamProtocol::new("/prime/1.0.0"); @@ -40,9 +37,6 @@ pub struct Node { bootnodes: Vec, cancellation_token: tokio_util::sync::CancellationToken, - dial_rx: - tokio::sync::mpsc::Receiver<(Vec, tokio::sync::oneshot::Sender>)>, - // channel for sending incoming messages to the consumer of this library incoming_message_tx: tokio::sync::mpsc::Sender, @@ -80,7 +74,6 @@ impl Node { mut swarm, bootnodes, cancellation_token, - mut dial_rx, incoming_message_tx, mut outgoing_message_rx, } = self; @@ -107,30 +100,16 @@ impl Node { debug!("cancellation token triggered, shutting down node"); break Ok(()); } - Some((addrs, res_tx)) = dial_rx.recv() => { - log::info!("dialing addresses: {addrs:?}"); - let mut res = Ok(()); - for addr in &addrs { - match swarm.dial(addr.clone()) { - Ok(_) => {} - Err(e) => { - res = Err(anyhow::anyhow!("failed to dial {addr}: {e:?}")); - break; - } - } - } - log::info!("finished dialing addresses: {addrs:?}"); - let _ = res_tx.send(res); - } Some(message) = outgoing_message_rx.recv() => { match message { - OutgoingMessage::Request((peer, _addrs, request)) => { + OutgoingMessage::Request((peer, addrs, request)) => { // TODO: if we're not connected to the peer, we should dial it - log::info!("sending request to peer {peer}: {request:?}"); + for addr in addrs { + swarm.add_peer_address(peer, addr); + } swarm.behaviour_mut().request_response().send_request(&peer, request); } OutgoingMessage::Response((channel, response)) => { - log::info!("sending response: {response:?}"); if let Err(e) = swarm.behaviour_mut().request_response().send_response(channel, response) { debug!("failed to send response: {e:?}"); } @@ -152,7 +131,7 @@ impl Node { peer_id, .. } => { - info!("connection established with peer {peer_id}"); + debug!("connection established with peer {peer_id}"); } SwarmEvent::ConnectionClosed { peer_id, @@ -282,7 +261,6 @@ impl NodeBuilder { self, ) -> Result<( Node, - DialSender, tokio::sync::mpsc::Receiver, tokio::sync::mpsc::Sender, )> { @@ -324,7 +302,6 @@ impl NodeBuilder { listen_addrs.push(listen_addr); } - let (dial_tx, dial_rx) = tokio::sync::mpsc::channel(100); let (incoming_message_tx, incoming_message_rx) = tokio::sync::mpsc::channel(100); let (outgoing_message_tx, outgoing_message_rx) = tokio::sync::mpsc::channel(100); @@ -334,12 +311,10 @@ impl NodeBuilder { swarm, listen_addrs, bootnodes, - dial_rx, incoming_message_tx, outgoing_message_rx, cancellation_token: cancellation_token.unwrap_or_default(), }, - dial_tx, incoming_message_rx, outgoing_message_tx, )) @@ -364,34 +339,13 @@ mod test { use super::NodeBuilder; use crate::message; - #[tokio::test] - async fn can_dial() { - let (node1, _, _, _) = NodeBuilder::new().with_port(4002).try_build().unwrap(); - let node1_peer_id = node1.peer_id(); - let local_p2p_address: crate::Multiaddr = - format!("/ip4/127.0.0.1/tcp/4002/p2p/{}", node1_peer_id) - .parse() - .expect("can parse valid multiaddr"); - let (node2, dial_tx2, _, _) = NodeBuilder::new().try_build().unwrap(); - tokio::spawn(async move { node1.run().await }); - tokio::spawn(async move { node2.run().await }); - - let (res_tx, res_rx) = tokio::sync::oneshot::channel(); - dial_tx2 - .send((vec![local_p2p_address], res_tx)) - .await - .expect("can send dial request"); - let res = res_rx.await.expect("can receive dial response"); - assert!(res.is_ok(), "dialing node1 should succeed: {res:?}"); - } - #[tokio::test] async fn two_nodes_can_connect_and_do_request_response() { - let (node1, _, mut incoming_message_rx1, outgoing_message_tx1) = + let (node1, mut incoming_message_rx1, outgoing_message_tx1) = NodeBuilder::new().with_get_task_logs().try_build().unwrap(); let node1_peer_id = node1.peer_id(); - let (node2, _, mut incoming_message_rx2, outgoing_message_tx2) = NodeBuilder::new() + let (node2, mut incoming_message_rx2, outgoing_message_tx2) = NodeBuilder::new() .with_get_task_logs() .with_bootnodes(node1.multiaddrs()) .try_build() diff --git a/crates/shared/src/p2p/service.rs b/crates/shared/src/p2p/service.rs index f0d504ca..9223bc3d 100644 --- a/crates/shared/src/p2p/service.rs +++ b/crates/shared/src/p2p/service.rs @@ -26,7 +26,6 @@ pub struct OutgoingRequest { /// requests to the worker. pub struct Service { node: Node, - dial_tx: p2p::DialSender, incoming_messages_rx: Receiver, outgoing_messages_rx: Receiver, cancellation_token: CancellationToken, @@ -41,7 +40,7 @@ impl Service { wallet: Wallet, protocols: Protocols, ) -> Result<(Self, Sender)> { - let (node, dial_tx, incoming_messages_rx, outgoing_messages) = + let (node, incoming_messages_rx, outgoing_messages) = build_p2p_node(keypair, port, cancellation_token.clone(), protocols.clone()) .context("failed to build p2p node")?; let (outgoing_messages_tx, outgoing_messages_rx) = tokio::sync::mpsc::channel(100); @@ -49,7 +48,6 @@ impl Service { Ok(( Self { node, - dial_tx, incoming_messages_rx, outgoing_messages_rx, cancellation_token, @@ -64,7 +62,6 @@ impl Service { let Self { node, - dial_tx, mut incoming_messages_rx, mut outgoing_messages_rx, cancellation_token, @@ -81,7 +78,7 @@ impl Service { break; } Some(message) = outgoing_messages_rx.recv() => { - let handle = tokio::task::spawn(handle_outgoing_message(message, dial_tx.clone(), context.clone())); + let handle = tokio::task::spawn(handle_outgoing_message(message, context.clone())); outgoing_message_handlers.push(handle); } Some(message) = incoming_messages_rx.recv() => { @@ -111,12 +108,7 @@ fn build_p2p_node( port: u16, cancellation_token: CancellationToken, protocols: Protocols, -) -> Result<( - Node, - p2p::DialSender, - Receiver, - Sender, -)> { +) -> Result<(Node, Receiver, Sender)> { NodeBuilder::new() .with_keypair(keypair) .with_port(port) @@ -171,11 +163,7 @@ impl Context { } } -async fn handle_outgoing_message( - message: OutgoingRequest, - dial_tx: p2p::DialSender, - context: Context, -) -> Result<()> { +async fn handle_outgoing_message(message: OutgoingRequest, context: Context) -> Result<()> { use rand_v8::rngs::OsRng; use rand_v8::Rng as _; use std::str::FromStr as _; @@ -205,9 +193,6 @@ async fn handle_outgoing_message( return Ok(()); } - log::info!("sending validation authentication request to {peer_id}"); - - // first, dial the worker // ensure there's no ongoing challenge // use write-lock to make this atomic until we finish sending the auth request and writing to the map let mut ongoing_auth_requests = context.ongoing_auth_requests.write().await; @@ -217,25 +202,14 @@ async fn handle_outgoing_message( let multiaddrs = multiaddrs .iter() - .filter_map(|addr| p2p::Multiaddr::from_str(addr).ok()?.with_p2p(peer_id).ok()) + .filter_map( + |addr| p2p::Multiaddr::from_str(addr).ok(), /* ?.with_p2p(peer_id).ok()*/ + ) .collect::>(); if multiaddrs.is_empty() { bail!("no valid multiaddrs for peer id {peer_id}"); } - // TODO: we can improve this by checking if we're already connected to the peer before dialing - let (res_tx, res_rx) = tokio::sync::oneshot::channel(); - dial_tx - .send((multiaddrs.clone(), res_tx)) - .await - .context("failed to send dial request")?; - log::info!("dialing worker {peer_id} with multiaddrs: {multiaddrs:?}"); - res_rx - .await - .context("failed to receive dial response")? - .context("failed to dial worker")?; - log::info!("dialed worker {peer_id} with multiaddrs: {multiaddrs:?}"); - // create the authentication challenge request message let challenge_bytes: [u8; 32] = OsRng.gen(); let auth_challenge_message: String = hex::encode(challenge_bytes); diff --git a/crates/validator/src/validators/hardware_challenge.rs b/crates/validator/src/validators/hardware_challenge.rs index 6d8df988..0c570ce0 100644 --- a/crates/validator/src/validators/hardware_challenge.rs +++ b/crates/validator/src/validators/hardware_challenge.rs @@ -65,14 +65,10 @@ impl HardwareChallenge { .await .context("failed to send hardware challenge request to p2p service")?; - info!("hardware challenge sent to node {}", node.id); - let resp = response_rx .await .context("failed to receive response from node")?; - info!("response received from node {}: {:?}", node.id, resp); - if challenge_expected.result == resp.result { info!("Challenge for node {} successful", node.id); } else { diff --git a/crates/worker/src/p2p/mod.rs b/crates/worker/src/p2p/mod.rs index c8464a7a..94fe10a3 100644 --- a/crates/worker/src/p2p/mod.rs +++ b/crates/worker/src/p2p/mod.rs @@ -112,7 +112,7 @@ fn build_p2p_node( port: u16, cancellation_token: CancellationToken, ) -> Result<(Node, Receiver, Sender)> { - let (node, _, incoming_message_rx, outgoing_message_tx) = NodeBuilder::new() + let (node, incoming_message_rx, outgoing_message_tx) = NodeBuilder::new() .with_keypair(keypair) .with_port(port) .with_authentication() From f87d5d3f894e72ce30d37c24621475333ecb983c Mon Sep 17 00:00:00 2001 From: elizabeth Date: Fri, 11 Jul 2025 11:33:03 -0400 Subject: [PATCH 34/38] remove println --- crates/validator/src/validators/hardware_challenge.rs | 5 ----- 1 file changed, 5 deletions(-) diff --git a/crates/validator/src/validators/hardware_challenge.rs b/crates/validator/src/validators/hardware_challenge.rs index 0c570ce0..6970355d 100644 --- a/crates/validator/src/validators/hardware_challenge.rs +++ b/crates/validator/src/validators/hardware_challenge.rs @@ -30,11 +30,6 @@ impl HardwareChallenge { .clone() .ok_or_else(|| anyhow::anyhow!("Node {} does not have P2P addresses", node.id))?; - println!( - "Challenging node {} with P2P ID: {} and addresses: {:?}", - node.id, p2p_id, p2p_addresses - ); - // create random challenge matrix let challenge_matrix = self.random_challenge(3, 3, 3, 3); let challenge_expected = p2p::calc_matrix(&challenge_matrix); From d77ef0492dcf882a067f9256b0d078e75f652c77 Mon Sep 17 00:00:00 2001 From: elizabeth Date: Fri, 11 Jul 2025 11:43:12 -0400 Subject: [PATCH 35/38] fix unit tests --- crates/worker/src/cli/command.rs | 12 +++++- crates/worker/src/docker/taskbridge/bridge.rs | 10 ++--- crates/worker/src/state/system_state.rs | 38 ++++++++++--------- 3 files changed, 36 insertions(+), 24 deletions(-) diff --git a/crates/worker/src/cli/command.rs b/crates/worker/src/cli/command.rs index 539de1ae..1e9e5825 100644 --- a/crates/worker/src/cli/command.rs +++ b/crates/worker/src/cli/command.rs @@ -220,11 +220,19 @@ pub async fn execute_command( ); std::process::exit(1); } - let state = Arc::new(SystemState::new( + let state = match SystemState::new( state_dir_overwrite.clone(), *disable_state_storing, *compute_pool_id, - )); + ) { + Ok(state) => state, + Err(e) => { + error!("❌ Failed to initialize system state: {e}"); + std::process::exit(1); + } + }; + + let state = Arc::new(state); let private_key_provider = if let Some(key) = private_key_provider { Console::warning("Using private key from command line is not recommended. Consider using PRIVATE_KEY_PROVIDER environment variable instead."); diff --git a/crates/worker/src/docker/taskbridge/bridge.rs b/crates/worker/src/docker/taskbridge/bridge.rs index 80b8aee7..4765ef06 100644 --- a/crates/worker/src/docker/taskbridge/bridge.rs +++ b/crates/worker/src/docker/taskbridge/bridge.rs @@ -473,7 +473,7 @@ mod tests { let temp_dir = tempdir()?; let socket_path = temp_dir.path().join("test.sock"); let metrics_store = Arc::new(MetricsStore::new()); - let state = Arc::new(SystemState::new(None, false, 0)); + let state = Arc::new(SystemState::new(None, false, 0).unwrap()); let bridge = TaskBridge::new( Some(socket_path.to_str().unwrap()), metrics_store.clone(), @@ -506,7 +506,7 @@ mod tests { let temp_dir = tempdir()?; let socket_path = temp_dir.path().join("test.sock"); let metrics_store = Arc::new(MetricsStore::new()); - let state = Arc::new(SystemState::new(None, false, 0)); + let state = Arc::new(SystemState::new(None, false, 0).unwrap()); let bridge = TaskBridge::new( Some(socket_path.to_str().unwrap()), metrics_store.clone(), @@ -541,7 +541,7 @@ mod tests { let temp_dir = tempdir()?; let socket_path = temp_dir.path().join("test.sock"); let metrics_store = Arc::new(MetricsStore::new()); - let state = Arc::new(SystemState::new(None, false, 0)); + let state = Arc::new(SystemState::new(None, false, 0).unwrap()); let bridge = TaskBridge::new( Some(socket_path.to_str().unwrap()), metrics_store.clone(), @@ -590,7 +590,7 @@ mod tests { let temp_dir = tempdir()?; let socket_path = temp_dir.path().join("test.sock"); let metrics_store = Arc::new(MetricsStore::new()); - let state = Arc::new(SystemState::new(None, false, 0)); + let state = Arc::new(SystemState::new(None, false, 0).unwrap()); let bridge = TaskBridge::new( Some(socket_path.to_str().unwrap()), metrics_store.clone(), @@ -639,7 +639,7 @@ mod tests { let temp_dir = tempdir()?; let socket_path = temp_dir.path().join("test.sock"); let metrics_store = Arc::new(MetricsStore::new()); - let state = Arc::new(SystemState::new(None, false, 0)); + let state = Arc::new(SystemState::new(None, false, 0).unwrap()); let bridge = TaskBridge::new( Some(socket_path.to_str().unwrap()), metrics_store.clone(), diff --git a/crates/worker/src/state/system_state.rs b/crates/worker/src/state/system_state.rs index bed32693..39955de8 100644 --- a/crates/worker/src/state/system_state.rs +++ b/crates/worker/src/state/system_state.rs @@ -1,3 +1,4 @@ +use anyhow::bail; use anyhow::Result; use directories::ProjectDirs; use log::debug; @@ -60,7 +61,7 @@ impl SystemState { state_dir: Option, disable_state_storing: bool, compute_pool_id: u32, - ) -> Self { + ) -> Result { let default_state_dir = get_default_state_dir(); debug!("Default state dir: {default_state_dir:?}"); let state_path = state_dir @@ -84,7 +85,7 @@ impl SystemState { endpoint = loaded_state.endpoint; p2p_keypair = Some(loaded_state.p2p_keypair); } else { - debug!("Failed to load state from {state_file:?}"); + bail!("failed to load state from {state_file:?}"); } } } @@ -93,7 +94,7 @@ impl SystemState { p2p_keypair = Some(p2p::Keypair::generate_ed25519()); } - Self { + Ok(Self { last_heartbeat: Arc::new(RwLock::new(None)), is_running: Arc::new(RwLock::new(false)), endpoint: Arc::new(RwLock::new(endpoint)), @@ -101,7 +102,7 @@ impl SystemState { disable_state_storing, compute_pool_id, p2p_keypair: p2p_keypair.expect("p2p keypair must be Some at this point"), - } + }) } fn save_state(&self, heartbeat_endpoint: Option) -> Result<()> { @@ -141,8 +142,7 @@ impl SystemState { match serde_json::from_str(&contents) { Ok(state) => return Ok(Some(state)), Err(e) => { - debug!("Error parsing state file: {e}"); - return Ok(None); + bail!("failed to parse state file: {e}"); } } } @@ -232,7 +232,8 @@ mod tests { Some(temp_dir.path().to_string_lossy().to_string()), false, 0, - ); + ) + .unwrap(); let _ = state .set_running(true, Some("http://localhost:8080/heartbeat".to_string())) .await; @@ -255,30 +256,33 @@ mod tests { let state_file = temp_dir.path().join(STATE_FILENAME); fs::write(&state_file, "invalid_toml_content").expect("Failed to write to state file"); - let state = SystemState::new( + assert!(SystemState::new( Some(temp_dir.path().to_string_lossy().to_string()), false, 0, - ); - assert!(!(state.is_running().await)); - assert_eq!(state.get_heartbeat_endpoint().await, None); + ) + .is_err()); } #[tokio::test] async fn test_load_state() { + let keypair = p2p::Keypair::generate_ed25519(); + let state = PersistedSystemState { + endpoint: Some("http://localhost:8080/heartbeat".to_string()), + p2p_keypair: keypair, + }; + let serialized = serde_json::to_string_pretty(&state).unwrap(); + let temp_dir = setup_test_dir(); let state_file = temp_dir.path().join(STATE_FILENAME); - fs::write( - &state_file, - r#"{"endpoint": "http://localhost:8080/heartbeat"}"#, - ) - .expect("Failed to write to state file"); + fs::write(&state_file, serialized).unwrap(); let state = SystemState::new( Some(temp_dir.path().to_string_lossy().to_string()), false, 0, - ); + ) + .unwrap(); assert_eq!( state.get_heartbeat_endpoint().await, Some("http://localhost:8080/heartbeat".to_string()) From 8663553ff840b0dcfa2db1c088fc18f9e991bed6 Mon Sep 17 00:00:00 2001 From: elizabeth Date: Fri, 11 Jul 2025 14:57:18 -0400 Subject: [PATCH 36/38] address some comments --- crates/p2p/src/behaviour.rs | 4 +++- crates/shared/src/p2p/service.rs | 2 +- crates/validator/src/p2p/mod.rs | 2 +- 3 files changed, 5 insertions(+), 3 deletions(-) diff --git a/crates/p2p/src/behaviour.rs b/crates/p2p/src/behaviour.rs index 399693b5..76f959e8 100644 --- a/crates/p2p/src/behaviour.rs +++ b/crates/p2p/src/behaviour.rs @@ -19,6 +19,8 @@ use crate::message::{Request, Response}; use crate::Protocols; use crate::PRIME_STREAM_PROTOCOL; +const DEFAULT_MAX_PEER_COUNT: u32 = 100; + #[derive(NetworkBehaviour)] #[behaviour(to_swarm = "BehaviourEvent")] pub(crate) struct Behaviour { @@ -109,7 +111,7 @@ impl Behaviour { let autonat = autonat::Behaviour::new(peer_id, autonat::Config::default()); let connection_limits = connection_limits::Behaviour::new( - ConnectionLimits::default().with_max_established(Some(100)), + ConnectionLimits::default().with_max_established(Some(DEFAULT_MAX_PEER_COUNT)), ); let mdns = mdns::tokio::Behaviour::new(mdns::Config::default(), peer_id) diff --git a/crates/shared/src/p2p/service.rs b/crates/shared/src/p2p/service.rs index 9223bc3d..73172927 100644 --- a/crates/shared/src/p2p/service.rs +++ b/crates/shared/src/p2p/service.rs @@ -375,7 +375,7 @@ async fn handle_validation_authentication_response( let Ok(recovered_address) = parsed_signature .recover_address_from_msg(&ongoing_challenge.auth_challenge_request_message) else { - bail!("Failed to recover address from response signature") + bail!("failed to recover address from response signature") }; // verify the recovered address matches the expected worker wallet address diff --git a/crates/validator/src/p2p/mod.rs b/crates/validator/src/p2p/mod.rs index 6fa8fac7..7d199eb2 100644 --- a/crates/validator/src/p2p/mod.rs +++ b/crates/validator/src/p2p/mod.rs @@ -10,7 +10,7 @@ use tokio_util::sync::CancellationToken; pub struct Service { inner: P2PService, - // converts incoming hardware challenges to outgoing requests + // converts this validator's hardware challenges to outgoing requests to workers outgoing_message_tx: Sender, hardware_challenge_rx: Receiver, } From 4f66957863f576e45e4bb8915aeecc5ca2d4bcae Mon Sep 17 00:00:00 2001 From: elizabeth Date: Fri, 11 Jul 2025 15:00:28 -0400 Subject: [PATCH 37/38] store outbound request when already authenticated --- crates/shared/src/p2p/service.rs | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/crates/shared/src/p2p/service.rs b/crates/shared/src/p2p/service.rs index 73172927..3d94a460 100644 --- a/crates/shared/src/p2p/service.rs +++ b/crates/shared/src/p2p/service.rs @@ -184,6 +184,9 @@ async fn handle_outgoing_message(message: OutgoingRequest, context: Context) -> log::debug!( "already authenticated with peer {peer_id}, skipping validation authentication" ); + let mut ongoing_outbound_requests = context.ongoing_outbound_requests.write().await; + ongoing_outbound_requests.insert((peer_id, request.protocol()), response_tx); + // multiaddresses are already known, as we've connected to them previously context .outgoing_messages @@ -309,7 +312,7 @@ async fn handle_incoming_response( bail!("received GetTaskLogsResponse from {from}, but get task logs protocol is not enabled"); } - log::debug!("received GetTaskLogsResponse from {from}: {resp:?}"); + log::info!("received GetTaskLogsResponse from {from}: {resp:?}"); let mut ongoing_outbound_requests = context.ongoing_outbound_requests.write().await; let Some(response_tx) = ongoing_outbound_requests.remove(&(from, Protocol::GetTaskLogs)) From d712de6ffc9c9696edb65e7b3f4f459095a35e4d Mon Sep 17 00:00:00 2001 From: elizabeth Date: Fri, 11 Jul 2025 15:01:11 -0400 Subject: [PATCH 38/38] use debug log --- crates/shared/src/p2p/service.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/crates/shared/src/p2p/service.rs b/crates/shared/src/p2p/service.rs index 3d94a460..bf776009 100644 --- a/crates/shared/src/p2p/service.rs +++ b/crates/shared/src/p2p/service.rs @@ -312,7 +312,7 @@ async fn handle_incoming_response( bail!("received GetTaskLogsResponse from {from}, but get task logs protocol is not enabled"); } - log::info!("received GetTaskLogsResponse from {from}: {resp:?}"); + log::debug!("received GetTaskLogsResponse from {from}: {resp:?}"); let mut ongoing_outbound_requests = context.ongoing_outbound_requests.write().await; let Some(response_tx) = ongoing_outbound_requests.remove(&(from, Protocol::GetTaskLogs))