From cefade9c2e81b02696047b063657c459ecc8b272 Mon Sep 17 00:00:00 2001 From: shijing xian Date: Tue, 25 Nov 2025 09:30:25 -0800 Subject: [PATCH 1/5] initial commit for video stream and other hooks --- CMakeLists.txt | 2 + include/livekit/livekit.h | 1 + include/livekit/local_track_publication.h | 3 +- include/livekit/participant.h | 20 +- include/livekit/remote_participant.h | 60 +++++ include/livekit/remote_track_publication.h | 2 + include/livekit/room.h | 4 + include/livekit/track_publication.h | 2 +- include/livekit/video_stream.h | 86 ++++++++ src/remote_participant.cpp | 48 ++++ src/room.cpp | 135 +++++++++++- src/room_proto_converter.cpp | 10 +- src/video_stream.cpp | 241 +++++++++++++++++++++ 13 files changed, 604 insertions(+), 10 deletions(-) create mode 100644 include/livekit/remote_participant.h create mode 100644 include/livekit/video_stream.h create mode 100644 src/remote_participant.cpp create mode 100644 src/video_stream.cpp diff --git a/CMakeLists.txt b/CMakeLists.txt index 07fcb34..9dab685 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -166,6 +166,7 @@ add_library(livekit include/livekit/remote_audio_track.h include/livekit/participant.h include/livekit/local_participant.h + include/livekit/remote_participant.h include/livekit/livekit.h include/livekit/stats.h include/livekit/track.h @@ -186,6 +187,7 @@ add_library(livekit src/room_proto_converter.cpp src/room_proto_converter.h src/local_participant.cpp + src/remote_participant.cpp src/stats.cpp src/track.cpp src/track_proto_converter.cpp diff --git a/include/livekit/livekit.h b/include/livekit/livekit.h index 23d63c4..4b5fc30 100644 --- a/include/livekit/livekit.h +++ b/include/livekit/livekit.h @@ -21,6 +21,7 @@ #include "local_track_publication.h" #include "local_video_track.h" #include "participant.h" +#include "remote_participant.h" #include "room.h" #include "room_delegate.h" #include "track_publication.h" diff --git a/include/livekit/local_track_publication.h b/include/livekit/local_track_publication.h index b53e4f5..3e8c6ed 100644 --- a/include/livekit/local_track_publication.h +++ b/include/livekit/local_track_publication.h @@ -28,7 +28,8 @@ class Track; class LocalTrackPublication : public TrackPublication { public: - /// Construct from an OwnedTrackPublication proto. + /// Note, this RemoteTrackPublication is constructed internally only; + /// safe to accept proto::OwnedTrackPublication. explicit LocalTrackPublication(const proto::OwnedTrackPublication &owned); /// Typed accessor for the attached LocalTrack (if any). diff --git a/include/livekit/participant.h b/include/livekit/participant.h index 826263b..a0003fe 100644 --- a/include/livekit/participant.h +++ b/include/livekit/participant.h @@ -41,7 +41,7 @@ class Participant { metadata_(std::move(metadata)), attributes_(std::move(attributes)), kind_(kind), reason_(reason) {} - // Plain getters/setters (caller ensures threading) + // Plain getters (caller ensures threading) const std::string &sid() const noexcept { return sid_; } const std::string &name() const noexcept { return name_; } const std::string &identity() const noexcept { return identity_; } @@ -55,6 +55,24 @@ class Participant { uintptr_t ffiHandleId() const noexcept { return handle_.get(); } + // Setters (caller ensures threading) + void set_name(std::string name) noexcept { name_ = std::move(name); } + void set_metadata(std::string metadata) noexcept { + metadata_ = std::move(metadata); + } + void + set_attributes(std::unordered_map attrs) noexcept { + attributes_ = std::move(attrs); + } + void set_attribute(const std::string &key, const std::string &value) { + attributes_[key] = value; + } + void remove_attribute(const std::string &key) { attributes_.erase(key); } + void set_kind(ParticipantKind kind) noexcept { kind_ = kind; } + void set_disconnect_reason(DisconnectReason reason) noexcept { + reason_ = reason; + } + private: FfiHandle handle_; std::string sid_, name_, identity_, metadata_; diff --git a/include/livekit/remote_participant.h b/include/livekit/remote_participant.h new file mode 100644 index 0000000..d1c1579 --- /dev/null +++ b/include/livekit/remote_participant.h @@ -0,0 +1,60 @@ +/* + * Copyright 2025 LiveKit + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an “AS IS” BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include "participant.h" + +#include +#include +#include + +namespace livekit { + +class RemoteTrackPublication; + +class RemoteParticipant : public Participant { +public: + using TrackPublicationMap = + std::unordered_map>; + + RemoteParticipant(FfiHandle handle, std::string sid, std::string name, + std::string identity, std::string metadata, + std::unordered_map attributes, + ParticipantKind kind, DisconnectReason reason); + + // A dictionary of track publications associated with the participant. + const TrackPublicationMap &track_publications() const noexcept { + return track_publications_; + } + + // Optional: non-const access if you want to mutate in-place. + TrackPublicationMap &mutable_track_publications() noexcept { + return track_publications_; + } + + // C++ equivalent of Python's __repr__ + std::string to_string() const; + +private: + TrackPublicationMap track_publications_; +}; + +// Convenience for logging / streaming +std::ostream &operator<<(std::ostream &os, + const RemoteParticipant &participant); + +} // namespace livekit diff --git a/include/livekit/remote_track_publication.h b/include/livekit/remote_track_publication.h index 9066058..aa39408 100644 --- a/include/livekit/remote_track_publication.h +++ b/include/livekit/remote_track_publication.h @@ -28,6 +28,8 @@ class Track; class RemoteTrackPublication : public TrackPublication { public: + /// Note, this RemoteTrackPublication is constructed internally only; + /// safe to accept proto::OwnedTrackPublication. explicit RemoteTrackPublication(const proto::OwnedTrackPublication &owned); /// Typed accessor for the attached RemoteTrack (if any). diff --git a/include/livekit/room.h b/include/livekit/room.h index 54f0d99..98077ed 100644 --- a/include/livekit/room.h +++ b/include/livekit/room.h @@ -32,6 +32,7 @@ class FfiEvent; } class LocalParticipant; +class RemoteParticipant; class Room { public: @@ -43,6 +44,7 @@ class Room { // Accessors RoomInfoData room_info() const; LocalParticipant *local_participant() const; + RemoteParticipant *remote_participant(const std::string &identity) const; private: mutable std::mutex lock_; @@ -51,6 +53,8 @@ class Room { RoomInfoData room_info_; std::shared_ptr room_handle_; std::unique_ptr local_participant_; + std::unordered_map> + remote_participants_; void OnEvent(const proto::FfiEvent &event); }; diff --git a/include/livekit/track_publication.h b/include/livekit/track_publication.h index 9536437..8503188 100644 --- a/include/livekit/track_publication.h +++ b/include/livekit/track_publication.h @@ -22,7 +22,7 @@ #include #include "livekit/ffi_handle.h" -#include "livekit/track.h" // TrackKind, TrackSource, AudioTrackFeature +#include "livekit/track.h" namespace livekit { diff --git a/include/livekit/video_stream.h b/include/livekit/video_stream.h new file mode 100644 index 0000000..2008358 --- /dev/null +++ b/include/livekit/video_stream.h @@ -0,0 +1,86 @@ +#pragma once + +#include +#include +#include +#include +#include +#include + +#include "ffi_handle.h" +#include "participant.h" +#include "track.h" +#include "video_frame.h" + +namespace livekit { + +// C++ equivalent of Python VideoFrameEvent +struct VideoFrameEvent { + VideoFrame frame; + std::int64_t timestamp_us; + VideoRotation rotation; +}; + +class VideoStream { +public: + struct Options { + std::size_t capacity{0}; // 0 = unbounded + std::optional format; // optional pixel format + }; + + // Factory: create a VideoStream bound to a specific Track + static std::unique_ptr + fromTrack(const std::shared_ptr &track, + const Options &options = Options{}); + + // Factory: create a VideoStream from a Participant + TrackSource + static std::unique_ptr + fromParticipant(Participant &participant, TrackSource track_source, + const Options &options = Options{}); + + ~VideoStream(); + + VideoStream(const VideoStream &) = delete; + VideoStream &operator=(const VideoStream &) = delete; + VideoStream(VideoStream &&) noexcept; + VideoStream &operator=(VideoStream &&) noexcept; + + /// Blocking read: returns true if a frame was delivered, + /// false if the stream has ended (EOS or closed). + bool read(VideoFrameEvent &out); + + /// Signal that we are no longer interested in frames. + /// Disposes the underlying FFI stream and drains internal listener. + void close(); + +private: + VideoStream() = default; + + // Internal init helpers, used by the factories + void initFromTrack(const std::shared_ptr &track, + const Options &options); + void initFromParticipant(Participant &participant, TrackSource source, + const Options &options); + + // FFI event handler (registered with FfiClient) + void onFfiEvent(const FfiEvent &event); + + // Queue helpers + void pushFrame(VideoFrameEvent &&ev); + void pushEos(); + + mutable std::mutex mutex_; + std::condition_variable cv_; + std::deque queue_; + std::size_t capacity_{0}; + bool eof_{false}; + bool closed_{false}; + + // Underlying FFI handle for the video stream + FfiHandle stream_handle_; + + // Listener id registered on FfiClient + std::int64_t listener_id_{0}; +}; + +} // namespace livekit diff --git a/src/remote_participant.cpp b/src/remote_participant.cpp new file mode 100644 index 0000000..38def8e --- /dev/null +++ b/src/remote_participant.cpp @@ -0,0 +1,48 @@ +/* + * Copyright 2025 LiveKit + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an “AS IS” BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "livekit/remote_participant.h" + +#include +#include +#include + +namespace livekit { + +RemoteParticipant::RemoteParticipant( + FfiHandle handle, std::string sid, std::string name, std::string identity, + std::string metadata, + std::unordered_map attributes, + ParticipantKind kind, DisconnectReason reason) + : Participant(std::move(handle), std::move(sid), std::move(name), + std::move(identity), std::move(metadata), + std::move(attributes), kind, reason), + track_publications_() {} + +std::string RemoteParticipant::to_string() const { + std::ostringstream oss; + oss << "rtc.RemoteParticipant(sid=" << sid() << ", identity=" << identity() + << ", name=" << name() << ")"; + return oss.str(); +} + +std::ostream &operator<<(std::ostream &os, + const RemoteParticipant &participant) { + os << participant.to_string(); + return os; +} + +} // namespace livekit diff --git a/src/room.cpp b/src/room.cpp index 4cfa3c0..9f56cdb 100644 --- a/src/room.cpp +++ b/src/room.cpp @@ -18,6 +18,9 @@ #include "livekit/ffi_client.h" #include "livekit/local_participant.h" +#include "livekit/local_track_publication.h" +#include "livekit/remote_participant.h" +#include "livekit/remote_track_publication.h" #include "livekit/room_delegate.h" #include "ffi.pb.h" @@ -36,6 +39,25 @@ using proto::FfiRequest; using proto::FfiResponse; using proto::RoomOptions; +namespace { + +std::unique_ptr +createRemoteParticipant(const proto::OwnedParticipant &owned) { + const auto &pinfo = owned.info(); + std::unordered_map attrs; + attrs.reserve(pinfo.attributes_size()); + for (const auto &kv : pinfo.attributes()) { + attrs.emplace(kv.first, kv.second); + } + auto kind = livekit::fromProto(pinfo.kind()); + auto reason = livekit::toDisconnectReason(pinfo.disconnect_reason()); + livekit::FfiHandle handle(static_cast(owned.handle().id())); + return std::make_unique( + std::move(handle), pinfo.sid(), pinfo.name(), pinfo.identity(), + pinfo.metadata(), std::move(attrs), kind, reason); +} + +} // namespace Room::Room() {} Room::~Room() {} @@ -87,10 +109,25 @@ bool Room::Connect(const std::string &url, const std::string &token) { std::move(participant_handle), pinfo.sid(), pinfo.name(), pinfo.identity(), pinfo.metadata(), std::move(attrs), kind, reason); } - // Setup remote particpants + // Setup remote participants { - // TODO, implement this remote participant feature + const auto &participants = connectCb.result().participants(); + std::lock_guard g(lock_); + for (const auto &pt : participants) { + const auto &owned = pt.participant(); + auto rp = createRemoteParticipant(owned); + // Add the initial remote participant tracks (like Python does) + for (const auto &owned_publication_info : pt.publications()) { + auto publication = + std::make_shared(owned_publication_info); + rp->mutable_track_publications().emplace(publication->sid(), + std::move(publication)); + } + + remote_participants_.emplace(rp->identity(), std::move(rp)); + } } + return true; } catch (const std::exception &e) { // On error, remove the listener and rethrow @@ -110,6 +147,12 @@ LocalParticipant *Room::local_participant() const { return local_participant_.get(); } +RemoteParticipant *Room::remote_participant(const std::string &identity) const { + std::lock_guard g(lock_); + auto it = remote_participants_.find(identity); + return it == remote_participants_.end() ? nullptr : it->second.get(); +} + void Room::OnEvent(const FfiEvent &event) { // Take a snapshot of the delegate under lock, but do NOT call it under the // lock. @@ -136,11 +179,37 @@ void Room::OnEvent(const FfiEvent &event) { switch (re.message_case()) { case proto::RoomEvent::kParticipantConnected: { auto ev = fromProto(re.participant_connected()); + std::cout << "kParticipantConnected " << std::endl; + // Create and register RemoteParticipant + { + std::lock_guard guard(lock_); + auto rp = createRemoteParticipant(re.participant_connected().info()); + remote_participants_.emplace(rp->identity(), std::move(rp)); + } + // TODO, use better public callback events delegate_snapshot->onParticipantConnected(*this, ev); + break; } case proto::RoomEvent::kParticipantDisconnected: { auto ev = fromProto(re.participant_disconnected()); + { + std::lock_guard guard(lock_); + const auto &pd = re.participant_disconnected(); + const std::string &identity = pd.participant_identity(); + auto it = remote_participants_.find(identity); + if (it != remote_participants_.end()) { + remote_participants_.erase(it); + } else { + // We saw a disconnect event for a participant we don't track + // internally. This can happen on races or if we never created a + // RemoteParticipant + std::cerr << "participant_disconnected for unknown identity: " + << identity << std::endl; + } + } + // TODO, should we trigger onParticipantDisconnected if remote + // participants can't be found ? delegate_snapshot->onParticipantDisconnected(*this, ev); break; } @@ -161,6 +230,28 @@ void Room::OnEvent(const FfiEvent &event) { } case proto::RoomEvent::kTrackPublished: { auto ev = fromProto(re.track_published()); + { + std::lock_guard guard(lock_); + const auto &tp = re.track_published(); + const std::string &identity = tp.participant_identity(); + auto it = remote_participants_.find(identity); + if (it != remote_participants_.end()) { + RemoteParticipant *rparticipant = it->second.get(); + const auto &owned_publication = tp.publication(); + auto rpublication = + std::make_shared(owned_publication); + // Store it on the participant, keyed by SID + rparticipant->mutable_track_publications().emplace( + rpublication->sid(), std::move(rpublication)); + + } else { + // Optional: log if we get a track for an unknown participant + std::cerr << "track_published for unknown participant: " << identity + << "\n"; + // Don't emit the + break; + } + } delegate_snapshot->onTrackPublished(*this, ev); break; } @@ -322,6 +413,46 @@ void Room::OnEvent(const FfiEvent &event) { } case proto::RoomEvent::kParticipantsUpdated: { auto ev = fromProto(re.participants_updated()); + { + std::lock_guard guard(lock_); + const auto &pu = re.participants_updated(); + for (const auto &info : pu.participants()) { + const std::string &identity = info.identity(); + Participant *participant = nullptr; + // First, check local participant. + if (local_participant_ && + identity == local_participant_->identity()) { + participant = local_participant_.get(); + } else { + // Otherwise, look for a remote participant. + auto it = remote_participants_.find(identity); + if (it != remote_participants_.end()) { + participant = it->second.get(); + } + } + + if (!participant) { + // Participant might not exist yet; ignore for now. + std::cerr << "Room::RoomEvent::kParticipantsUpdated participant " + "does not exist: " + << identity << std::endl; + continue; + } + + // Update basic fields + participant->set_name(info.name()); + participant->set_metadata(info.metadata()); + std::unordered_map attrs; + attrs.reserve(info.attributes_size()); + for (const auto &kv : info.attributes()) { + attrs.emplace(kv.first, kv.second); + } + participant->set_attributes(std::move(attrs)); + participant->set_kind(fromProto(info.kind())); + participant->set_disconnect_reason( + toDisconnectReason(info.disconnect_reason())); + } + } delegate_snapshot->onParticipantsUpdated(*this, ev); break; } diff --git a/src/room_proto_converter.cpp b/src/room_proto_converter.cpp index 1df125c..5423f07 100644 --- a/src/room_proto_converter.cpp +++ b/src/room_proto_converter.cpp @@ -215,12 +215,12 @@ DataStreamTrailerData fromProto(const proto::DataStream_Trailer &in) { // --------- event conversions --------- -ParticipantConnectedEvent -fromProto(const proto::ParticipantConnected & /*in*/) { +ParticipantConnectedEvent fromProto(const proto::ParticipantConnected &in) { ParticipantConnectedEvent ev; - // in.info() is OwnedParticipant; you can fill more fields once you inspect - // it. For now, leave metadata/name/identity as TODO. - // TODO: map in.info().info().identity(), name(), metadata(), etc. + const auto &pinfo = in.info().info(); + ev.identity = pinfo.identity(); + ev.name = pinfo.name(); + ev.metadata = pinfo.metadata(); return ev; } diff --git a/src/video_stream.cpp b/src/video_stream.cpp new file mode 100644 index 0000000..42f28d9 --- /dev/null +++ b/src/video_stream.cpp @@ -0,0 +1,241 @@ +#include "video_stream.h" + +#include + +#include "ffi_client.h" +// Include your actual generated proto headers here: +#include "proto/ffi_rpc.pb.h" +#include "proto/video_frame.pb.h" + +namespace livekit { + +using proto::FfiEvent; +using proto::FfiRequest; +using proto::VideoStreamEvent; + +// ------------------------ +// Factory helpers +// ------------------------ + +std::unique_ptr +VideoStream::fromTrack(const std::shared_ptr &track, + const Options &options) { + auto stream = std::unique_ptr(new VideoStream()); + stream->initFromTrack(track, options); + return stream; +} + +std::unique_ptr +VideoStream::fromParticipant(Participant &participant, TrackSource track_source, + const Options &options) { + auto stream = std::unique_ptr(new VideoStream()); + stream->initFromParticipant(participant, track_source, options); + return stream; +} + +// ------------------------ +// Destructor / move +// ------------------------ + +VideoStream::~VideoStream() { close(); } + +VideoStream::VideoStream(VideoStream &&other) noexcept { + std::lock_guard lock(other.mutex_); + queue_ = std::move(other.queue_); + capacity_ = other.capacity_; + eof_ = other.eof_; + closed_ = other.closed_; + stream_handle_ = std::move(other.stream_handle_); + listener_id_ = other.listener_id_; + + other.listener_id_ = 0; + other.closed_ = true; +} + +VideoStream &VideoStream::operator=(VideoStream &&other) noexcept { + if (this == &other) + return *this; + + close(); + + { + std::lock_guard lock_this(mutex_); + std::lock_guard lock_other(other.mutex_); + + queue_ = std::move(other.queue_); + capacity_ = other.capacity_; + eof_ = other.eof_; + closed_ = other.closed_; + stream_handle_ = std::move(other.stream_handle_); + listener_id_ = other.listener_id_; + + other.listener_id_ = 0; + other.closed_ = true; + } + + return *this; +} + +// ------------------------ +// Init internals +// ------------------------ + +void VideoStream::initFromTrack(const std::shared_ptr &track, + const Options &options) { + capacity_ = options.capacity; + + // 1) Subscribe to FFI events + listener_id_ = FfiClient::instance().AddListener( + [this](const FfiEvent &e) { this->onFfiEvent(e); }); + + // 2) Send FFI request to create a new video stream bound to this track + FfiRequest req; + auto *new_video_stream = req.mutable_new_video_stream(); + new_video_stream->set_track_handle(track->ffiHandleId()); + new_video_stream->set_type(proto::VideoStreamType::VIDEO_STREAM_NATIVE); + new_video_stream->set_normalize_stride(true); + if (options.format.has_value()) { + new_video_stream->set_format(static_cast(*options.format)); + } + + auto resp = FfiClient::instance().request(req); + // Adjust field names to match your proto exactly: + const auto &stream = resp.new_video_stream().stream(); + stream_handle_ = FfiHandle(static_cast(stream.handle().id())); + // stream.info() is available if you want to cache metadata. +} + +void VideoStream::initFromParticipant(Participant &participant, + TrackSource track_source, + const Options &options) { + capacity_ = options.capacity; + + // 1) Subscribe to FFI events + listener_id_ = FfiClient::instance().AddListener( + [this](const FfiEvent &e) { this->onFfiEvent(e); }); + + // 2) Send FFI request to create a video stream from participant + track + // source + FfiRequest req; + auto *vs = req.mutable_video_stream_from_participant(); + vs->set_participant_handle(participant.ffiHandleId()); + vs->set_type(proto::VideoStreamType::VIDEO_STREAM_NATIVE); + vs->set_track_source(static_cast(track_source)); + vs->set_normalize_stride(true); + if (options.format.has_value()) { + vs->set_format(static_cast(*options.format)); + } + + auto resp = FfiClient::instance().request(req); + // Adjust field names to match your proto exactly: + const auto &stream = resp.video_stream_from_participant().stream(); + stream_handle_ = FfiHandle(static_cast(stream.handle().id())); +} + +// ------------------------ +// Public API +// ------------------------ + +bool VideoStream::read(VideoFrameEvent &out) { + std::unique_lock lock(mutex_); + + cv_.wait(lock, [this] { return !queue_.empty() || eof_ || closed_; }); + + if (closed_ || (queue_.empty() && eof_)) { + return false; // EOS / closed + } + + out = std::move(queue_.front()); + queue_.pop_front(); + return true; +} + +void VideoStream::close() { + { + std::lock_guard lock(mutex_); + if (closed_) { + return; + } + closed_ = true; + } + + // Dispose FFI handle + if (stream_handle_.get() != 0) { + stream_handle_.dispose(); + } + + // Remove listener + if (listener_id_ != 0) { + FfiClient::instance().RemoveListener(listener_id_); + listener_id_ = 0; + } + + // Wake any waiting readers + cv_.notify_all(); +} + +// ------------------------ +// Internal helpers +// ------------------------ + +void VideoStream::onFfiEvent(const FfiEvent &event) { + // Filter for video_stream_event first. + if (event.message_case() != FfiEvent::kVideoStreamEvent) { + return; + } + + const auto &vse = event.video_stream_event(); + + // Check if this event is for our stream handle. + if (static_cast(vse.stream_handle().id()) != + stream_handle_.get()) { + return; + } + + // Handle frame_received or eos. + if (vse.has_frame_received()) { + const auto &fr = vse.frame_received(); + + // Convert owned buffer->VideoFrame via a helper. + // You should implement this static function in your VideoFrame class. + VideoFrame frame = VideoFrame::fromOwnedInfo(fr.buffer()); + + VideoFrameEvent ev{std::move(frame), fr.timestamp_us(), + static_cast(fr.rotation())}; + + pushFrame(std::move(ev)); + } else if (vse.has_eos()) { + pushEos(); + } +} + +void VideoStream::pushFrame(VideoFrameEvent &&ev) { + { + std::lock_guard lock(mutex_); + + if (closed_ || eof_) { + return; + } + + if (capacity_ > 0 && queue_.size() >= capacity_) { + // Ring behavior: drop oldest frame. + queue_.pop_front(); + } + + queue_.push_back(std::move(ev)); + } + cv_.notify_one(); +} + +void VideoStream::pushEos() { + { + std::lock_guard lock(mutex_); + if (eof_) { + return; + } + eof_ = true; + } + cv_.notify_all(); +} + +} // namespace livekit From f872678866a9a83b2de01cbc1e585fa3066c7915 Mon Sep 17 00:00:00 2001 From: shijing xian Date: Tue, 25 Nov 2025 14:06:43 -0800 Subject: [PATCH 2/5] Get things functional --- CMakeLists.txt | 4 + include/livekit/audio_stream.h | 107 ++++++++++++++ include/livekit/video_frame.h | 20 +-- include/livekit/video_stream.h | 38 +++-- src/audio_stream.cpp | 254 +++++++++++++++++++++++++++++++++ src/video_frame.cpp | 85 +++++++---- src/video_stream.cpp | 41 +++--- src/video_utils.cpp | 20 +-- src/video_utils.h | 2 + 9 files changed, 485 insertions(+), 86 deletions(-) create mode 100644 include/livekit/audio_stream.h create mode 100644 src/audio_stream.cpp diff --git a/CMakeLists.txt b/CMakeLists.txt index 9dab685..6baabde 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -158,6 +158,7 @@ add_custom_command( add_library(livekit include/livekit/audio_frame.h include/livekit/audio_source.h + include/livekit/audio_stream.h include/livekit/room.h include/livekit/room_delegate.h include/livekit/ffi_handle.h @@ -175,10 +176,12 @@ add_library(livekit include/livekit/remote_track_publication.h include/livekit/video_frame.h include/livekit/video_source.h + include/livekit/video_stream.h include/livekit/local_video_track.h include/livekit/remote_video_track.h src/audio_frame.cpp src/audio_source.cpp + src/audio_stream.cpp src/ffi_handle.cpp src/ffi_client.cpp src/local_audio_track.cpp @@ -197,6 +200,7 @@ add_library(livekit src/remote_track_publication.cpp src/video_frame.cpp src/video_source.cpp + src/video_stream.cpp src/local_video_track.cpp src/remote_video_track.cpp src/video_utils.cpp diff --git a/include/livekit/audio_stream.h b/include/livekit/audio_stream.h new file mode 100644 index 0000000..84aac8a --- /dev/null +++ b/include/livekit/audio_stream.h @@ -0,0 +1,107 @@ +/* + * Copyright 2025 LiveKit + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an “AS IS” BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include +#include +#include +#include +#include +#include +#include + +#include "audio_frame.h" +#include "ffi_handle.h" +#include "participant.h" +#include "track.h" + +namespace livekit { + +namespace proto { +class FfiEvent; +} + +struct AudioFrameEvent { + AudioFrame frame; +}; + +class AudioStream { +public: + struct Options { + std::size_t capacity{0}; // 0 = unbounded + int sample_rate{48000}; + int num_channels{1}; + std::string noise_cancellation_module; // empty = disabled + std::string noise_cancellation_options_json; // empty = no options + }; + + // Factory: create an AudioStream bound to a specific Track + static std::unique_ptr + from_track(const std::shared_ptr &track, const Options &options); + + // Factory: create an AudioStream from a Participant + TrackSource + static std::unique_ptr from_participant(Participant &participant, + TrackSource track_source, + const Options &options); + + ~AudioStream(); + + AudioStream(const AudioStream &) = delete; + AudioStream &operator=(const AudioStream &) = delete; + AudioStream(AudioStream &&) noexcept; + AudioStream &operator=(AudioStream &&) noexcept; + + /// Blocking read: returns true if a frame was delivered, + /// false if the stream has ended (EOS or closed). + bool read(AudioFrameEvent &out_event); + + /// Signal that we are no longer interested in frames. + /// Disposes the underlying FFI stream and removes the listener. + void close(); + +private: + AudioStream() = default; + + void init_from_track(const std::shared_ptr &track, + const Options &options); + void init_from_participant(Participant &participant, TrackSource track_source, + const Options &options); + + // FFI event handler (registered with FfiClient) + void on_ffi_event(const proto::FfiEvent &event); + + // Queue helpers + void push_frame(AudioFrameEvent &&ev); + void push_eos(); + + mutable std::mutex mutex_; + std::condition_variable cv_; + std::deque queue_; + std::size_t capacity_{0}; + bool eof_{false}; + bool closed_{false}; + + Options options_; + + // Underlying FFI audio stream handle + FfiHandle stream_handle_; + + // Listener id registered on FfiClient + std::int64_t listener_id_{0}; +}; + +} // namespace livekit diff --git a/include/livekit/video_frame.h b/include/livekit/video_frame.h index 3ba4f16..a43dc21 100644 --- a/include/livekit/video_frame.h +++ b/include/livekit/video_frame.h @@ -44,6 +44,10 @@ struct VideoPlaneInfo { std::uint32_t size; // plane size in bytes }; +namespace proto { +class OwnedVideoBuffer; +} + /** * Public SDK representation of a video frame. * @@ -63,16 +67,6 @@ class LKVideoFrame { LKVideoFrame(LKVideoFrame &&) noexcept = default; LKVideoFrame &operator=(LKVideoFrame &&) noexcept = default; - /* LKVideoFrame(LKVideoFrame&& other) noexcept - : width_(other.width_), - height_(other.height_), - type_(other.type_), - data_(std::move(other.data_)) { - other.width_ = 0; - other.height_ = 0; - } - LKVideoFrame& operator=(LKVideoFrame&& other) noexcept;*/ - /** * Allocate a new frame with the correct buffer size for the given format. * Data is zero-initialized. @@ -123,6 +117,12 @@ class LKVideoFrame { */ LKVideoFrame convert(VideoBufferType dst, bool flip_y = false) const; +protected: + friend class VideoStream; + // Only internal classes (e.g., VideoStream) + // should construct frames directly from FFI buffers. + static LKVideoFrame fromOwnedInfo(const proto::OwnedVideoBuffer &owned); + private: int width_; int height_; diff --git a/include/livekit/video_stream.h b/include/livekit/video_stream.h index 2008358..322ab1d 100644 --- a/include/livekit/video_stream.h +++ b/include/livekit/video_stream.h @@ -1,3 +1,19 @@ +/* + * Copyright 2025 LiveKit + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an “AS IS” BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + #pragma once #include @@ -11,32 +27,36 @@ #include "participant.h" #include "track.h" #include "video_frame.h" +#include "video_source.h" namespace livekit { // C++ equivalent of Python VideoFrameEvent struct VideoFrameEvent { - VideoFrame frame; + LKVideoFrame frame; std::int64_t timestamp_us; VideoRotation rotation; }; +namespace proto { +class FfiEvent; +} + class VideoStream { public: struct Options { - std::size_t capacity{0}; // 0 = unbounded - std::optional format; // optional pixel format + std::size_t capacity{0}; // 0 = unbounded + VideoBufferType format; }; // Factory: create a VideoStream bound to a specific Track static std::unique_ptr - fromTrack(const std::shared_ptr &track, - const Options &options = Options{}); + fromTrack(const std::shared_ptr &track, const Options &options); // Factory: create a VideoStream from a Participant + TrackSource - static std::unique_ptr - fromParticipant(Participant &participant, TrackSource track_source, - const Options &options = Options{}); + static std::unique_ptr fromParticipant(Participant &participant, + TrackSource track_source, + const Options &options); ~VideoStream(); @@ -63,7 +83,7 @@ class VideoStream { const Options &options); // FFI event handler (registered with FfiClient) - void onFfiEvent(const FfiEvent &event); + void onFfiEvent(const proto::FfiEvent &event); // Queue helpers void pushFrame(VideoFrameEvent &&ev); diff --git a/src/audio_stream.cpp b/src/audio_stream.cpp new file mode 100644 index 0000000..975908f --- /dev/null +++ b/src/audio_stream.cpp @@ -0,0 +1,254 @@ +/* + * Copyright 2025 LiveKit + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an “AS IS” BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "livekit/audio_stream.h" + +#include + +#include "audio_frame.pb.h" +#include "ffi.pb.h" +#include "livekit/ffi_client.h" +#include "livekit/track.h" + +namespace livekit { + +using proto::FfiEvent; +using proto::FfiRequest; + +// ------------------------ +// Factory helpers +// ------------------------ + +std::unique_ptr +AudioStream::from_track(const std::shared_ptr &track, + const Options &options) { + auto stream = std::unique_ptr(new AudioStream()); + stream->init_from_track(track, options); + return stream; +} + +std::unique_ptr +AudioStream::from_participant(Participant &participant, + TrackSource track_source, + const Options &options) { + auto stream = std::unique_ptr(new AudioStream()); + stream->init_from_participant(participant, track_source, options); + return stream; +} + +// ------------------------ +// Destructor / move +// ------------------------ + +AudioStream::~AudioStream() { close(); } + +AudioStream::AudioStream(AudioStream &&other) noexcept { + std::lock_guard lock(other.mutex_); + queue_ = std::move(other.queue_); + capacity_ = other.capacity_; + eof_ = other.eof_; + closed_ = other.closed_; + options_ = other.options_; + stream_handle_ = std::move(other.stream_handle_); + listener_id_ = other.listener_id_; + + other.listener_id_ = 0; + other.closed_ = true; +} + +AudioStream &AudioStream::operator=(AudioStream &&other) noexcept { + if (this == &other) { + return *this; + } + + close(); + + { + std::lock_guard lock_this(mutex_); + std::lock_guard lock_other(other.mutex_); + + queue_ = std::move(other.queue_); + capacity_ = other.capacity_; + eof_ = other.eof_; + closed_ = other.closed_; + options_ = other.options_; + stream_handle_ = std::move(other.stream_handle_); + listener_id_ = other.listener_id_; + + other.listener_id_ = 0; + other.closed_ = true; + } + + return *this; +} + +bool AudioStream::read(AudioFrameEvent &out_event) { + std::unique_lock lock(mutex_); + + cv_.wait(lock, [this] { return !queue_.empty() || eof_ || closed_; }); + + if (closed_ || (queue_.empty() && eof_)) { + return false; // EOS / closed + } + + out_event = std::move(queue_.front()); + queue_.pop_front(); + return true; +} + +void AudioStream::close() { + { + std::lock_guard lock(mutex_); + if (closed_) { + return; + } + closed_ = true; + } + + // Dispose FFI handle + if (stream_handle_.get() != 0) { + stream_handle_.reset(); + } + + // Remove listener + if (listener_id_ != 0) { + FfiClient::instance().RemoveListener(listener_id_); + listener_id_ = 0; + } + + // Wake any waiting readers + cv_.notify_all(); +} + +// Internal functions + +void AudioStream::init_from_track(const std::shared_ptr &track, + const Options &options) { + capacity_ = options.capacity; + options_ = options; + + // 1) Subscribe to FFI events + listener_id_ = FfiClient::instance().AddListener( + [this](const FfiEvent &e) { this->on_ffi_event(e); }); + + // 2) Send FfiRequest to create a new audio stream bound to this track + FfiRequest req; + auto *new_audio_stream = req.mutable_new_audio_stream(); + new_audio_stream->set_track_handle( + static_cast(track->ffi_handle_id())); + new_audio_stream->set_sample_rate(options_.sample_rate); + new_audio_stream->set_num_channels(options.num_channels); + new_audio_stream->set_type(proto::AudioStreamType::AUDIO_STREAM_NATIVE); + + if (!options_.noise_cancellation_module.empty()) { + new_audio_stream->set_audio_filter_module_id( + options_.noise_cancellation_module); + // Always set options JSON even if empty — backend will treat empty string + // as “no options” + new_audio_stream->set_audio_filter_options( + options_.noise_cancellation_options_json); + } + + auto resp = FfiClient::instance().sendRequest(req); + const auto &stream = resp.new_audio_stream().stream(); + stream_handle_ = FfiHandle(static_cast(stream.handle().id())); +} + +void AudioStream::init_from_participant(Participant &participant, + TrackSource track_source, + const Options &options) { + capacity_ = options.capacity; + options_ = options; + + // 1) Subscribe to FFI events + listener_id_ = FfiClient::instance().AddListener( + [this](const FfiEvent &e) { this->on_ffi_event(e); }); + + // 2) Send FfiRequest to create audio stream from participant + track source + FfiRequest req; + auto *as = req.mutable_audio_stream_from_participant(); + as->set_participant_handle(participant.ffiHandleId()); + as->set_sample_rate(options_.sample_rate); + as->set_num_channels(options_.num_channels); + as->set_type(proto::AudioStreamType::AUDIO_STREAM_NATIVE); + as->set_track_source(static_cast(track_source)); + + if (!options_.noise_cancellation_module.empty()) { + as->set_audio_filter_module_id(options_.noise_cancellation_module); + // Always set options JSON even if empty — backend will treat empty string + // as “no options” + as->set_audio_filter_options(options_.noise_cancellation_options_json); + } + + auto resp = FfiClient::instance().sendRequest(req); + const auto &stream = resp.audio_stream_from_participant().stream(); + stream_handle_ = FfiHandle(static_cast(stream.handle().id())); +} + +void AudioStream::on_ffi_event(const FfiEvent &event) { + if (event.message_case() != FfiEvent::kAudioStreamEvent) { + return; + } + + const auto &ase = event.audio_stream_event(); + // Check if this event is for our stream handle. + if (ase.stream_handle() != static_cast(stream_handle_.get())) { + return; + } + if (ase.has_frame_received()) { + const auto &fr = ase.frame_received(); + + // Convert owned buffer -> AudioFrame via helper. + // Implement AudioFrame::fromOwnedInfo(...) to mirror Python's + // AudioFrame._from_owned_info. + AudioFrame frame = AudioFrame::fromOwnedInfo(fr.frame()); + AudioFrameEvent ev{std::move(frame)}; + push_frame(std::move(ev)); + } else if (ase.has_eos()) { + push_eos(); + } +} + +void AudioStream::push_frame(AudioFrameEvent &&ev) { + { + std::lock_guard lock(mutex_); + + if (closed_ || eof_) { + return; + } + + if (capacity_ > 0 && queue_.size() >= capacity_) { + // Ring behavior: drop oldest frame when full. + queue_.pop_front(); + } + + queue_.push_back(std::move(ev)); + } + cv_.notify_one(); +} + +void AudioStream::push_eos() { + { + std::lock_guard lock(mutex_); + if (eof_) { + return; + } + eof_ = true; + } + cv_.notify_all(); +} + +} // namespace livekit diff --git a/src/video_frame.cpp b/src/video_frame.cpp index badce04..a56e7f2 100644 --- a/src/video_frame.cpp +++ b/src/video_frame.cpp @@ -6,6 +6,7 @@ #include #include +#include "livekit/ffi_handle.h" #include "video_utils.h" namespace livekit { @@ -271,37 +272,7 @@ LKVideoFrame::LKVideoFrame(int width, int height, VideoBufferType type, throw std::invalid_argument("LKVideoFrame: provided data is too small for " "the specified format and size"); } - std::cout << "width_ is " << width_ << std::endl; - std::cout << "height_ is " << height_ << std::endl; } -/* -LKVideoFrame& LKVideoFrame::operator=(LKVideoFrame&& other) noexcept { - // 1. Self-assignment check - if (this == &other) { - return *this; - } - - // 2. Resource cleanup (The std::vector handles its own memory cleanup, - // but the simple members must be transferred.) - - // 3. Transfer simple members (width, height, type) - width_ = other.width_; - height_ = other.height_; - type_ = other.type_; - - // 4. Transfer complex resource (std::vector) - // std::move() is used to invoke std::vector's move assignment operator - data_ = std::move(other.data_); - - // 5. Optional: Reset the 'other' object to a valid but empty/default state. - // This is good practice for the object that has been moved-from. - other.width_ = 0; - other.height_ = 0; - // other.data_ is already empty after the move assignment - - // 6. Return reference to the assigned object - return *this; -}*/ LKVideoFrame LKVideoFrame::create(int width, int height, VideoBufferType type) { const std::size_t size = computeBufferSize(width, height, type); @@ -335,4 +306,58 @@ LKVideoFrame LKVideoFrame::convert(VideoBufferType dst, bool flip_y) const { return convertViaFfi(*this, dst, flip_y); } +LKVideoFrame LKVideoFrame::fromOwnedInfo(const proto::OwnedVideoBuffer &owned) { + const auto &info = owned.info(); + const int width = static_cast(info.width()); + const int height = static_cast(info.height()); + // Assuming your C++ enum matches proto's underlying values. + const VideoBufferType type = static_cast(info.type()); + + std::vector buffer; + + if (info.components_size() > 0) { + // Multi-plane (e.g. I420, NV12, etc.). We pack planes back-to-back. + std::size_t total_size = 0; + for (const auto &comp : info.components()) { + total_size += static_cast(comp.size()); + } + + buffer.resize(total_size); + std::size_t offset = 0; + for (const auto &comp : info.components()) { + const auto sz = static_cast(comp.size()); + const auto src_ptr = reinterpret_cast( + static_cast(comp.data_ptr())); + + std::memcpy(buffer.data() + offset, src_ptr, sz); + offset += sz; + } + } else { + // Packed format: treat top-level data_ptr as a single contiguous buffer. + const auto src_ptr = reinterpret_cast( + static_cast(info.data_ptr())); + + std::size_t total_size = 0; + if (info.has_stride()) { + // Use stride * height as total size (includes per-row padding if any). + total_size = static_cast(info.stride()) * + static_cast(height); + } else { + // Use our generic buffer-size helper (width/height/type). + total_size = computeBufferSize(width, height, type); + } + + buffer.resize(total_size); + std::memcpy(buffer.data(), src_ptr, total_size); + } + + // Release the FFI-owned buffer after copying the data. + { + FfiHandle owned_handle(static_cast(owned.handle().id())); + // owned_handle destroyed at end of scope → native buffer disposed. + } + + return LKVideoFrame(width, height, type, std::move(buffer)); +} + } // namespace livekit diff --git a/src/video_stream.cpp b/src/video_stream.cpp index 42f28d9..19b1903 100644 --- a/src/video_stream.cpp +++ b/src/video_stream.cpp @@ -1,11 +1,12 @@ -#include "video_stream.h" +#include "livekit/video_stream.h" #include -#include "ffi_client.h" -// Include your actual generated proto headers here: -#include "proto/ffi_rpc.pb.h" -#include "proto/video_frame.pb.h" +#include "ffi.pb.h" +#include "livekit/ffi_client.h" +#include "livekit/track.h" +#include "video_frame.pb.h" +#include "video_utils.h" namespace livekit { @@ -86,19 +87,17 @@ void VideoStream::initFromTrack(const std::shared_ptr &track, // 1) Subscribe to FFI events listener_id_ = FfiClient::instance().AddListener( - [this](const FfiEvent &e) { this->onFfiEvent(e); }); + [this](const proto::FfiEvent &e) { this->onFfiEvent(e); }); // 2) Send FFI request to create a new video stream bound to this track FfiRequest req; auto *new_video_stream = req.mutable_new_video_stream(); - new_video_stream->set_track_handle(track->ffiHandleId()); + new_video_stream->set_track_handle(track->ffi_handle_id()); new_video_stream->set_type(proto::VideoStreamType::VIDEO_STREAM_NATIVE); new_video_stream->set_normalize_stride(true); - if (options.format.has_value()) { - new_video_stream->set_format(static_cast(*options.format)); - } + new_video_stream->set_format(toProto(options.format)); - auto resp = FfiClient::instance().request(req); + auto resp = FfiClient::instance().sendRequest(req); // Adjust field names to match your proto exactly: const auto &stream = resp.new_video_stream().stream(); stream_handle_ = FfiHandle(static_cast(stream.handle().id())); @@ -120,13 +119,11 @@ void VideoStream::initFromParticipant(Participant &participant, auto *vs = req.mutable_video_stream_from_participant(); vs->set_participant_handle(participant.ffiHandleId()); vs->set_type(proto::VideoStreamType::VIDEO_STREAM_NATIVE); - vs->set_track_source(static_cast(track_source)); + vs->set_track_source(static_cast(track_source)); vs->set_normalize_stride(true); - if (options.format.has_value()) { - vs->set_format(static_cast(*options.format)); - } + vs->set_format(toProto(options.format)); - auto resp = FfiClient::instance().request(req); + auto resp = FfiClient::instance().sendRequest(req); // Adjust field names to match your proto exactly: const auto &stream = resp.video_stream_from_participant().stream(); stream_handle_ = FfiHandle(static_cast(stream.handle().id())); @@ -161,7 +158,7 @@ void VideoStream::close() { // Dispose FFI handle if (stream_handle_.get() != 0) { - stream_handle_.dispose(); + stream_handle_.reset(); } // Remove listener @@ -178,27 +175,23 @@ void VideoStream::close() { // Internal helpers // ------------------------ -void VideoStream::onFfiEvent(const FfiEvent &event) { +void VideoStream::onFfiEvent(const proto::FfiEvent &event) { // Filter for video_stream_event first. if (event.message_case() != FfiEvent::kVideoStreamEvent) { return; } - const auto &vse = event.video_stream_event(); - // Check if this event is for our stream handle. - if (static_cast(vse.stream_handle().id()) != - stream_handle_.get()) { + if (vse.stream_handle() != static_cast(stream_handle_.get())) { return; } - // Handle frame_received or eos. if (vse.has_frame_received()) { const auto &fr = vse.frame_received(); // Convert owned buffer->VideoFrame via a helper. // You should implement this static function in your VideoFrame class. - VideoFrame frame = VideoFrame::fromOwnedInfo(fr.buffer()); + LKVideoFrame frame = LKVideoFrame::fromOwnedInfo(fr.buffer()); VideoFrameEvent ev{std::move(frame), fr.timestamp_us(), static_cast(fr.rotation())}; diff --git a/src/video_utils.cpp b/src/video_utils.cpp index be1470f..4f4c5c0 100644 --- a/src/video_utils.cpp +++ b/src/video_utils.cpp @@ -11,10 +11,7 @@ namespace livekit { -namespace { - -// Map SDK enum -> proto enum -proto::VideoBufferType toProtoBufferType(VideoBufferType t) { +proto::VideoBufferType toProto(VideoBufferType t) { switch (t) { case VideoBufferType::ARGB: return proto::VideoBufferType::ARGB; @@ -39,12 +36,12 @@ proto::VideoBufferType toProtoBufferType(VideoBufferType t) { case VideoBufferType::NV12: return proto::VideoBufferType::NV12; default: - throw std::runtime_error("Unknown VideoBufferType in toProtoBufferType"); + throw std::runtime_error("Unknown VideoBufferType in toProto"); } } // Map proto enum -> SDK enum -VideoBufferType fromProtoBufferType(proto::VideoBufferType t) { +VideoBufferType fromProto(proto::VideoBufferType t) { switch (t) { case proto::VideoBufferType::ARGB: return VideoBufferType::ARGB; @@ -69,13 +66,10 @@ VideoBufferType fromProtoBufferType(proto::VideoBufferType t) { case proto::VideoBufferType::NV12: return VideoBufferType::NV12; default: - throw std::runtime_error( - "Unknown proto::VideoBufferType in fromProtoBufferType"); + throw std::runtime_error("Unknown proto::VideoBufferType in fromProto"); } } -} // namespace - proto::VideoBufferInfo toProto(const LKVideoFrame &frame) { proto::VideoBufferInfo info; @@ -83,7 +77,7 @@ proto::VideoBufferInfo toProto(const LKVideoFrame &frame) { const int h = frame.height(); info.set_width(w); info.set_height(h); - info.set_type(toProtoBufferType(frame.type())); + info.set_type(toProto(frame.type())); // Backing data pointer for the whole buffer auto base_ptr = reinterpret_cast(frame.data()); @@ -123,7 +117,7 @@ LKVideoFrame fromOwnedProto(const proto::OwnedVideoBuffer &owned) { const int width = static_cast(info.width()); const int height = static_cast(info.height()); - const VideoBufferType type = fromProtoBufferType(info.type()); + const VideoBufferType type = fromProto(info.type()); // Allocate a new LKVideoFrame with the correct size/format LKVideoFrame frame = LKVideoFrame::create(width, height, type); @@ -154,7 +148,7 @@ LKVideoFrame convertViaFfi(const LKVideoFrame &frame, VideoBufferType dst, proto::FfiRequest req; auto *vc = req.mutable_video_convert(); vc->set_flip_y(flip_y); - vc->set_dst_type(toProtoBufferType(dst)); + vc->set_dst_type(toProto(dst)); vc->mutable_buffer()->CopyFrom(toProto(frame)); proto::FfiResponse resp = FfiClient::instance().sendRequest(req); diff --git a/src/video_utils.h b/src/video_utils.h index 5ece8f2..733e4a5 100644 --- a/src/video_utils.h +++ b/src/video_utils.h @@ -26,5 +26,7 @@ proto::VideoBufferInfo toProto(const LKVideoFrame &frame); LKVideoFrame fromOwnedProto(const proto::OwnedVideoBuffer &owned); LKVideoFrame convertViaFfi(const LKVideoFrame &frame, VideoBufferType dst, bool flip_y); +proto::VideoBufferType toProto(VideoBufferType t); +VideoBufferType fromProto(proto::VideoBufferType t); } // namespace livekit From 450c54390269647f1b987eaba164b9308da8f780 Mon Sep 17 00:00:00 2001 From: shijing xian Date: Sat, 29 Nov 2025 14:50:05 -0800 Subject: [PATCH 3/5] Implement the SDL media handling and hook up audio / video streams --- examples/CMakeLists.txt | 19 +- examples/cmake/sdl3.cmake | 14 + examples/simple_room/fallback_capture.cpp | 113 ++++ examples/simple_room/fallback_capture.h | 35 ++ examples/simple_room/main.cpp | 213 ++++---- examples/simple_room/sdl_media.cpp | 227 ++++++++ examples/simple_room/sdl_media.h | 128 +++++ examples/simple_room/sdl_media_manager.cpp | 557 ++++++++++++++++++++ examples/simple_room/sdl_media_manager.h | 109 ++++ examples/simple_room/sdl_video_renderer.cpp | 165 ++++++ examples/simple_room/sdl_video_renderer.h | 52 ++ include/livekit/audio_frame.h | 1 + include/livekit/audio_stream.h | 73 ++- include/livekit/ffi_client.h | 4 +- include/livekit/livekit.h | 5 +- include/livekit/local_participant.h | 3 +- include/livekit/participant.h | 1 - include/livekit/remote_audio_track.h | 6 +- include/livekit/remote_video_track.h | 6 +- include/livekit/room.h | 134 ++++- include/livekit/room_delegate.h | 11 +- include/livekit/stats.h | 4 - include/livekit/track.h | 3 +- include/livekit/track_publication.h | 7 +- include/livekit/video_frame.h | 6 +- include/livekit/video_stream.h | 48 +- src/audio_frame.cpp | 3 + src/audio_stream.cpp | 58 +- src/ffi_client.cpp | 57 +- src/remote_audio_track.cpp | 19 +- src/remote_video_track.cpp | 19 +- src/room.cpp | 66 ++- src/room_event_converter.cpp | 24 - src/room_event_converter.h | 1 - src/room_proto_converter.cpp | 12 - src/room_proto_converter.h | 1 - src/video_frame.cpp | 6 +- src/video_stream.cpp | 125 +++-- 38 files changed, 1996 insertions(+), 339 deletions(-) create mode 100644 examples/cmake/sdl3.cmake create mode 100644 examples/simple_room/fallback_capture.cpp create mode 100644 examples/simple_room/fallback_capture.h create mode 100644 examples/simple_room/sdl_media.cpp create mode 100644 examples/simple_room/sdl_media.h create mode 100644 examples/simple_room/sdl_media_manager.cpp create mode 100644 examples/simple_room/sdl_media_manager.h create mode 100644 examples/simple_room/sdl_video_renderer.cpp create mode 100644 examples/simple_room/sdl_video_renderer.h diff --git a/examples/CMakeLists.txt b/examples/CMakeLists.txt index c8d68a4..ef7076e 100644 --- a/examples/CMakeLists.txt +++ b/examples/CMakeLists.txt @@ -1,16 +1,31 @@ cmake_minimum_required(VERSION 3.31.0) project (livekit-examples) +list(APPEND CMAKE_MODULE_PATH "${CMAKE_CURRENT_SOURCE_DIR}/cmake") +include(sdl3) + add_executable(SimpleRoom simple_room/main.cpp + simple_room/fallback_capture.cpp + simple_room/fallback_capture.h + simple_room/sdl_media.cpp + simple_room/sdl_media.h + simple_room/sdl_media_manager.cpp + simple_room/sdl_media_manager.h + simple_room/sdl_video_renderer.cpp + simple_room/sdl_video_renderer.h simple_room/wav_audio_source.cpp simple_room/wav_audio_source.h ) -target_link_libraries(SimpleRoom livekit) +target_link_libraries(SimpleRoom + PRIVATE + livekit + SDL3::SDL3 +) add_custom_command(TARGET SimpleRoom POST_BUILD COMMAND ${CMAKE_COMMAND} -E copy_directory ${CMAKE_SOURCE_DIR}/data ${CMAKE_CURRENT_BINARY_DIR}/data -) \ No newline at end of file +) diff --git a/examples/cmake/sdl3.cmake b/examples/cmake/sdl3.cmake new file mode 100644 index 0000000..8a1899e --- /dev/null +++ b/examples/cmake/sdl3.cmake @@ -0,0 +1,14 @@ +# cmake/sdl3.cmake +include(FetchContent) + +# Only fetch/build SDL3 once, even if this file is included multiple times +if (NOT TARGET SDL3::SDL3) + FetchContent_Declare( + SDL3 + GIT_REPOSITORY https://github.com/libsdl-org/SDL.git + GIT_TAG release-3.2.26 + ) + + FetchContent_MakeAvailable(SDL3) +endif() + diff --git a/examples/simple_room/fallback_capture.cpp b/examples/simple_room/fallback_capture.cpp new file mode 100644 index 0000000..158b81c --- /dev/null +++ b/examples/simple_room/fallback_capture.cpp @@ -0,0 +1,113 @@ +/* + * Copyright 2025 LiveKit, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "fallback_capture.h" + +#include "livekit/livekit.h" +#include "wav_audio_source.h" + +using namespace livekit; + +// Test utils to run a capture loop to publish noisy audio frames to the room +void runNoiseCaptureLoop(const std::shared_ptr &source, + std::atomic &running_flag) { + const int sample_rate = source->sample_rate(); + const int num_channels = source->num_channels(); + const int frame_ms = 10; + const int samples_per_channel = sample_rate * frame_ms / 1000; + + // FIX: variable name should not shadow the type + WavAudioSource wavSource("data/welcome.wav", 48000, 1, false); + + using Clock = std::chrono::steady_clock; + auto next_deadline = Clock::now(); + while (running_flag.load(std::memory_order_relaxed)) { + AudioFrame frame = + AudioFrame::create(sample_rate, num_channels, samples_per_channel); + wavSource.fillFrame(frame); + try { + source->captureFrame(frame); + } catch (const std::exception &e) { + std::cerr << "Error in captureFrame (noise): " << e.what() << std::endl; + break; + } + + // Pace the loop to roughly real-time + next_deadline += std::chrono::milliseconds(frame_ms); + std::this_thread::sleep_until(next_deadline); + } + + try { + source->clearQueue(); + } catch (...) { + std::cout << "Error in clearQueue (noise)" << std::endl; + } +} + +// Fake video source: solid color cycling +void runFakeVideoCaptureLoop(const std::shared_ptr &source, + std::atomic &running_flag) { + auto frame = LKVideoFrame::create(1280, 720, VideoBufferType::BGRA); + const double framerate = 1.0 / 30.0; + + while (running_flag.load(std::memory_order_relaxed)) { + static auto start = std::chrono::high_resolution_clock::now(); + float t = std::chrono::duration( + std::chrono::high_resolution_clock::now() - start) + .count(); + // Cycle every 4 seconds: 0=red, 1=green, 2=blue, 3=black + int stage = static_cast(t) % 4; + + std::array rgb{}; + switch (stage) { + case 0: // red + rgb = {255, 0, 0, 0}; + break; + case 1: // green + rgb = {0, 255, 0, 0}; + break; + case 2: // blue + rgb = {0, 0, 255, 0}; + break; + case 3: // black + default: + rgb = {0, 0, 0, 0}; + break; + } + + // ARGB + uint8_t *data = frame.data(); + const size_t size = frame.dataSize(); + for (size_t i = 0; i < size; i += 4) { + data[i + 0] = 255; // A + data[i + 1] = rgb[0]; // R + data[i + 2] = rgb[1]; // G + data[i + 3] = rgb[2]; // B + } + + try { + // If VideoSource is ARGB-capable, pass frame. + // If it expects I420, pass i420 instead. + source->captureFrame(frame, 0, VideoRotation::VIDEO_ROTATION_0); + } catch (const std::exception &e) { + std::cerr << "Error in captureFrame (fake video): " << e.what() + << std::endl; + break; + } + + std::this_thread::sleep_for(std::chrono::duration(framerate)); + } +} diff --git a/examples/simple_room/fallback_capture.h b/examples/simple_room/fallback_capture.h new file mode 100644 index 0000000..a7d8536 --- /dev/null +++ b/examples/simple_room/fallback_capture.h @@ -0,0 +1,35 @@ +/* + * Copyright 2025 LiveKit, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include +#include + +// Assuming you already have this somewhere: +extern std::atomic g_running; + +namespace livekit { +class AudioSource; +class VideoSource; +} // namespace livekit + +void runNoiseCaptureLoop(const std::shared_ptr &source, + std::atomic &running_flag); + +void runFakeVideoCaptureLoop( + const std::shared_ptr &source, + std::atomic &running_flag); diff --git a/examples/simple_room/main.cpp b/examples/simple_room/main.cpp index 85221c4..dd23091 100644 --- a/examples/simple_room/main.cpp +++ b/examples/simple_room/main.cpp @@ -1,3 +1,19 @@ +/* + * Copyright 2025 LiveKit, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + #include #include #include @@ -9,6 +25,7 @@ #include #include "livekit/livekit.h" +#include "sdl_media_manager.h" #include "wav_audio_source.h" // TODO(shijing), remove this livekit_ffi.h as it should be internal only. @@ -103,8 +120,37 @@ bool parse_args(int argc, char *argv[], std::string &url, std::string &token) { return !(url.empty() || token.empty()); } +class MainThreadDispatcher { +public: + static void dispatch(std::function fn) { + std::lock_guard lock(mutex_); + queue_.push(std::move(fn)); + } + + static void update() { + std::queue> local; + + { + std::lock_guard lock(mutex_); + std::swap(local, queue_); + } + + // Run everything on main thread + while (!local.empty()) { + local.front()(); + local.pop(); + } + } + +private: + static inline std::mutex mutex_; + static inline std::queue> queue_; +}; + class SimpleRoomDelegate : public livekit::RoomDelegate { public: + explicit SimpleRoomDelegate(SDLMediaManager &media) : media_(media) {} + void onParticipantConnected( livekit::Room & /*room*/, const livekit::ParticipantConnectedEvent &ev) override { @@ -114,101 +160,54 @@ class SimpleRoomDelegate : public livekit::RoomDelegate { void onTrackSubscribed(livekit::Room & /*room*/, const livekit::TrackSubscribedEvent &ev) override { + const char *participant_identity = + ev.participant ? ev.participant->identity().c_str() : ""; + const std::string track_sid = + ev.publication ? ev.publication->sid() : ""; + const std::string track_name = + ev.publication ? ev.publication->name() : ""; std::cout << "[Room] track subscribed: participant_identity=" - << ev.participant_identity << " track_sid=" << ev.track_sid - << " name=" << ev.track_name << "\n"; - // TODO(shijing): when you expose Track kind/source here, you can check - // whether this is a video track and start a VideoStream-like consumer. Use - // the python code as reference. - } -}; - -// Test utils to run a capture loop to publish noisy audio frames to the room -void runNoiseCaptureLoop(const std::shared_ptr &source) { - const int sample_rate = source->sample_rate(); - const int num_channels = source->num_channels(); - const int frame_ms = 10; - const int samples_per_channel = sample_rate * frame_ms / 1000; - - WavAudioSource WavAudioSource("data/welcome.wav", 48000, 1, false); - using Clock = std::chrono::steady_clock; - auto next_deadline = Clock::now(); - while (g_running.load(std::memory_order_relaxed)) { - AudioFrame frame = - AudioFrame::create(sample_rate, num_channels, samples_per_channel); - WavAudioSource.fillFrame(frame); - try { - source->captureFrame(frame); - } catch (const std::exception &e) { - // If something goes wrong, log and break out - std::cerr << "Error in captureFrame: " << e.what() << std::endl; - break; + << participant_identity << " track_sid=" << track_sid + << " name=" << track_name; + if (ev.track) { + std::cout << " kind=" << static_cast(ev.track->kind()) << "\n"; + } + if (ev.publication) { + std::cout << " source=" << static_cast(ev.publication->source()) + << "\n"; } - // Pace the loop to roughly real-time - next_deadline += std::chrono::milliseconds(frame_ms); - std::this_thread::sleep_until(next_deadline); - } - - // Optionally clear queued audio on exit - try { - source->clearQueue(); - } catch (...) { - // ignore errors on shutdown - std::cout << "Error in clearQueue" << std::endl; - } -} + // If this is a VIDEO track, create a VideoStream and attach to renderer + if (ev.track && ev.track->kind() == TrackKind::KIND_VIDEO) { + VideoStream::Options opts; + opts.format = livekit::VideoBufferType::RGBA; + auto video_stream = VideoStream::fromTrack(ev.track, opts); + std::cout << "after fromTrack " << std::endl; + if (!video_stream) { + std::cerr << "Failed to create VideoStream for track " << track_sid + << "\n"; + return; + } -void runFakeVideoCaptureLoop(const std::shared_ptr &source) { - auto frame = LKVideoFrame::create(1280, 720, VideoBufferType::ARGB); - double framerate = 1.0 / 30; - while (g_running.load(std::memory_order_relaxed)) { - static auto start = std::chrono::high_resolution_clock::now(); - float t = std::chrono::duration( - std::chrono::high_resolution_clock::now() - start) - .count(); - // Cycle every 4 seconds: 0=red, 1=green, 2=blue, 3 black - int stage = static_cast(t) % 4; - std::vector rgb(4); - switch (stage) { - case 0: // red - rgb[0] = 255; - rgb[1] = 0; - rgb[2] = 0; - break; - case 1: // green - rgb[0] = 0; - rgb[1] = 255; - rgb[2] = 0; - break; - case 2: // blue - rgb[0] = 0; - rgb[1] = 0; - rgb[2] = 255; - break; - case 4: // black - rgb[0] = 0; - rgb[1] = 0; - rgb[2] = 0; + MainThreadDispatcher::dispatch([this, video_stream] { + if (!media_.initRenderer(video_stream)) { + std::cerr << "SDLMediaManager::startRenderer failed for track\n"; + } + }); + } else if (ev.track && ev.track->kind() == TrackKind::KIND_AUDIO) { + AudioStream::Options opts; + auto audio_stream = AudioStream::fromTrack(ev.track, opts); + MainThreadDispatcher::dispatch([this, audio_stream] { + if (!media_.startSpeaker(audio_stream)) { + std::cerr << "SDLMediaManager::startRenderer failed for track\n"; + } + }); } - for (size_t i = 0; i < frame.dataSize(); i += 4) { - frame.data()[i] = 255; - frame.data()[i + 1] = rgb[0]; - frame.data()[i + 2] = rgb[1]; - frame.data()[i + 3] = rgb[2]; - } - LKVideoFrame i420 = convertViaFfi(frame, VideoBufferType::I420, false); - try { - source->captureFrame(frame, 0, VideoRotation::VIDEO_ROTATION_0); - } catch (const std::exception &e) { - // If something goes wrong, log and break out - std::cerr << "Error in captureFrame: " << e.what() << std::endl; - break; - } - - std::this_thread::sleep_for(std::chrono::duration(framerate)); } -} + +private: + SDLMediaManager &media_; +}; } // namespace @@ -225,16 +224,28 @@ int main(int argc, char *argv[]) { return 1; } + if (!SDL_Init(SDL_INIT_VIDEO)) { + std::cerr << "SDL_Init(SDL_INIT_VIDEO) failed: " << SDL_GetError() << "\n"; + // You can choose to exit, or run in "headless" mode without renderer. + // return 1; + } + + // Setup media; + SDLMediaManager media; + std::cout << "Connecting to: " << url << std::endl; // Handle Ctrl-C to exit the idle loop std::signal(SIGINT, handle_sigint); livekit::Room room{}; - SimpleRoomDelegate delegate; + SimpleRoomDelegate delegate(media); room.setDelegate(&delegate); - bool res = room.Connect(url, token); + RoomOptions options; + options.auto_subscribe = true; + options.dynacast = false; + bool res = room.Connect(url, token, options); std::cout << "Connect result is " << std::boolalpha << res << std::endl; if (!res) { std::cerr << "Failed to connect to room\n"; @@ -287,9 +298,7 @@ int main(int argc, char *argv[]) { std::cerr << "Failed to publish track: " << e.what() << std::endl; } - // TODO, if we have pre-buffering feature, we might consider starting the - // thread right after creating the source. - std::thread audioThread(runNoiseCaptureLoop, audioSource); + media.startMic(audioSource); // Setup Video Source / Track auto videoSource = std::make_shared(1280, 720); @@ -316,24 +325,24 @@ int main(int argc, char *argv[]) { } catch (const std::exception &e) { std::cerr << "Failed to publish track: " << e.what() << std::endl; } - std::thread videoThread(runFakeVideoCaptureLoop, videoSource); + media.startCamera(videoSource); // Keep the app alive until Ctrl-C so we continue receiving events, // similar to asyncio.run(main()) keeping the loop running. while (g_running.load()) { - std::this_thread::sleep_for(std::chrono::milliseconds(100)); + MainThreadDispatcher::update(); + media.render(); + std::this_thread::sleep_for(std::chrono::milliseconds(10)); } // Shutdown the audio thread. - if (audioThread.joinable()) { - audioThread.join(); - } + media.stopMic(); + // Clean up the audio track publishment room.local_participant()->unpublishTrack(audioPub->sid()); - if (videoThread.joinable()) { - videoThread.join(); - } + media.stopCamera(); + // Clean up the video track publishment room.local_participant()->unpublishTrack(videoPub->sid()); diff --git a/examples/simple_room/sdl_media.cpp b/examples/simple_room/sdl_media.cpp new file mode 100644 index 0000000..4961f51 --- /dev/null +++ b/examples/simple_room/sdl_media.cpp @@ -0,0 +1,227 @@ +/* + * Copyright 2025 LiveKit, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "sdl_media.h" + +#include + +// ---------------------- SDLMicSource ----------------------------- + +SDLMicSource::SDLMicSource(int sample_rate, int channels, int frame_samples, + AudioCallback cb) + : sample_rate_(sample_rate), channels_(channels), + frame_samples_(frame_samples), callback_(std::move(cb)) {} + +SDLMicSource::~SDLMicSource() { + if (stream_) { + SDL_DestroyAudioStream(stream_); + stream_ = nullptr; + } +} + +bool SDLMicSource::init() { + // desired output (what SDL will give us when we call SDL_GetAudioStreamData) + SDL_zero(spec_); + spec_.format = SDL_AUDIO_S16; // 16-bit signed + spec_.channels = static_cast(channels_); + spec_.freq = sample_rate_; + + // Open default recording device as an audio stream + // This works for both playback and recording, depending on the device id. + stream_ = SDL_OpenAudioDeviceStream( + SDL_AUDIO_DEVICE_DEFAULT_RECORDING, // recording device + &spec_, + nullptr, // no callback, we'll poll + nullptr); + + if (!stream_) { + std::cerr << "Failed to open recording stream: " << SDL_GetError() << "\n"; + return false; + } + + if (!SDL_ResumeAudioStreamDevice(stream_)) { // unpause device + std::cerr << "Failed to resume recording device: " << SDL_GetError() + << "\n"; + return false; + } + + return true; +} + +void SDLMicSource::pump() { + if (!stream_ || !callback_) + return; + + const int samples_per_frame_total = frame_samples_ * channels_; + const int bytes_per_frame = samples_per_frame_total * sizeof(int16_t); + + // Only pull if at least one "frame" worth of audio is available + const int available = SDL_GetAudioStreamAvailable(stream_); // bytes + if (available < bytes_per_frame) { + return; + } + + std::vector buffer(samples_per_frame_total); + + const int got_bytes = SDL_GetAudioStreamData(stream_, buffer.data(), + bytes_per_frame); // + + if (got_bytes <= 0) { + return; // nothing or error (log if you like) + } + + const int got_samples_total = got_bytes / sizeof(int16_t); + const int got_samples_per_channel = got_samples_total / channels_; + + callback_(buffer.data(), got_samples_per_channel, sample_rate_, channels_); +} + +void SDLMicSource::pause() { + if (stream_) { + SDL_PauseAudioStreamDevice(stream_); // + } +} + +void SDLMicSource::resume() { + if (stream_) { + SDL_ResumeAudioStreamDevice(stream_); // + } +} + +// ---------------------- DDLSpeakerSink ----------------------------- + +DDLSpeakerSink::DDLSpeakerSink(int sample_rate, int channels) + : sample_rate_(sample_rate), channels_(channels) {} + +DDLSpeakerSink::~DDLSpeakerSink() { + if (stream_) { + SDL_DestroyAudioStream(stream_); // also closes device + stream_ = nullptr; + } +} + +bool DDLSpeakerSink::init() { + SDL_zero(spec_); + spec_.format = SDL_AUDIO_S16; // expect S16 input for playback + spec_.channels = static_cast(channels_); + spec_.freq = sample_rate_; + + // Open default playback device as a stream. + stream_ = SDL_OpenAudioDeviceStream(SDL_AUDIO_DEVICE_DEFAULT_PLAYBACK, &spec_, + nullptr, // no callback; we'll push data + nullptr); + + if (!stream_) { + std::cerr << "Failed to open playback stream: " << SDL_GetError() << "\n"; + return false; + } + + if (!SDL_ResumeAudioStreamDevice(stream_)) { + std::cerr << "Failed to resume playback device: " << SDL_GetError() << "\n"; + return false; + } + + return true; +} + +void DDLSpeakerSink::enqueue(const int16_t *samples, + int num_samples_per_channel) { + if (!stream_ || !samples) + return; + + const int totalSamples = num_samples_per_channel * channels_; + const int bytes = totalSamples * static_cast(sizeof(int16_t)); + + // SDL will resample / convert as needed on SDL_GetAudioStreamData() side. + if (!SDL_PutAudioStreamData(stream_, samples, bytes)) { + std::cerr << "SDL_PutAudioStreamData failed: " << SDL_GetError() << "\n"; + } +} + +void DDLSpeakerSink::pause() { + if (stream_) { + SDL_PauseAudioStreamDevice(stream_); + } +} + +void DDLSpeakerSink::resume() { + if (stream_) { + SDL_ResumeAudioStreamDevice(stream_); + } +} + +// ---------------------- SDLCamSource ----------------------------- + +SDLCamSource::SDLCamSource(int desired_width, int desired_height, + int desired_fps, SDL_PixelFormat pixel_format, + VideoCallback cb) + : width_(desired_width), height_(desired_height), fps_(desired_fps), + format_(pixel_format), callback_(std::move(cb)) {} + +SDLCamSource::~SDLCamSource() { + if (camera_) { + SDL_CloseCamera(camera_); // + camera_ = nullptr; + } +} + +bool SDLCamSource::init() { + int count = 0; + SDL_CameraID *cams = SDL_GetCameras(&count); // + if (!cams || count == 0) { + std::cerr << "No cameras available: " << SDL_GetError() << "\n"; + if (cams) + SDL_free(cams); + return false; + } + + SDL_CameraID camId = cams[0]; // first camera for now + SDL_free(cams); + + SDL_zero(spec_); + spec_.format = format_; + spec_.colorspace = SDL_COLORSPACE_SRGB; + spec_.width = width_; + spec_.height = height_; + spec_.framerate_numerator = fps_; + spec_.framerate_denominator = 1; + + camera_ = SDL_OpenCamera(camId, &spec_); + if (!camera_) { + std::cerr << "Failed to open camera: " << SDL_GetError() << "\n"; + return false; + } + + // On many platforms you must wait for SDL_EVENT_CAMERA_DEVICE_APPROVED; + // here we assume the app’s main loop is already handling that. + return true; +} + +void SDLCamSource::pump() { + if (!camera_ || !callback_) + return; + + Uint64 tsNS = 0; + SDL_Surface *surf = SDL_AcquireCameraFrame(camera_, &tsNS); // non-blocking + if (!surf) { + return; + } + + callback_(static_cast(surf->pixels), surf->pitch, surf->w, surf->h, + surf->format, tsNS); + + SDL_ReleaseCameraFrame(camera_, surf); // +} diff --git a/examples/simple_room/sdl_media.h b/examples/simple_room/sdl_media.h new file mode 100644 index 0000000..a60bca6 --- /dev/null +++ b/examples/simple_room/sdl_media.h @@ -0,0 +1,128 @@ +/* + * Copyright 2025 LiveKit, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include +#include +#include +#include + +// ------------------------- +// SDLMicSource +// ------------------------- +// Periodically call pump() from your main loop or a capture thread. +// It will pull 10ms frames from the mic (by default) and pass them to the +// AudioCallback. +class SDLMicSource { +public: + using AudioCallback = std::function; + + SDLMicSource(int sample_rate = 48000, int channels = 1, + int frame_samples = 480, AudioCallback cb = nullptr); + + ~SDLMicSource(); + + // Initialize SDL audio stream for recording + bool init(); + + // Call regularly to pull mic data and send to callback. + void pump(); + + void pause(); + void resume(); + + bool isValid() const { return stream_ != nullptr; } + +private: + SDL_AudioStream *stream_ = nullptr; + SDL_AudioSpec spec_{}; + int sample_rate_; + int channels_; + int frame_samples_; + AudioCallback callback_; +}; + +// ------------------------- +// DDLSpeakerSink +// ------------------------- +// For remote audio: when you get a decoded PCM frame, +// call enqueue() with interleaved S16 samples. +class DDLSpeakerSink { +public: + DDLSpeakerSink(int sample_rate = 48000, int channels = 1); + + ~DDLSpeakerSink(); + + bool init(); + + // Enqueue interleaved S16 samples for playback. + void enqueue(const int16_t *samples, int num_samples_per_channel); + + void pause(); + void resume(); + + bool isValid() const { return stream_ != nullptr; } + +private: + SDL_AudioStream *stream_ = nullptr; + SDL_AudioSpec spec_{}; + int sample_rate_; + int channels_; +}; + +// ------------------------- +// SDLCamSource +// ------------------------- +// Periodically call pump(); each time a new frame is available +// it will invoke the VideoCallback with the raw pixels. +// +// NOTE: pixels are in the SDL_Surface format returned by the camera +// (often SDL_PIXELFORMAT_ARGB8888). You can either: +// - convert to whatever your LiveKit video source expects, or +// - tell LiveKit that this is ARGB with the given stride. +class SDLCamSource { +public: + using VideoCallback = std::function; + + SDLCamSource(int desired_width = 1280, int desired_height = 720, + int desired_fps = 30, + SDL_PixelFormat pixelFormat = SDL_PIXELFORMAT_RGBA8888, + VideoCallback cb = nullptr); + + ~SDLCamSource(); + + bool init(); // open first available camera with (approximately) given spec + + // Call regularly; will call VideoCallback when a frame is available. + void pump(); + + bool isValid() const { return camera_ != nullptr; } + +private: + SDL_Camera *camera_ = nullptr; + SDL_CameraSpec spec_{}; + int width_; + int height_; + int fps_; + SDL_PixelFormat format_; + VideoCallback callback_; +}; diff --git a/examples/simple_room/sdl_media_manager.cpp b/examples/simple_room/sdl_media_manager.cpp new file mode 100644 index 0000000..58d3bd5 --- /dev/null +++ b/examples/simple_room/sdl_media_manager.cpp @@ -0,0 +1,557 @@ +/* + * Copyright 2025 LiveKit, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "sdl_media_manager.h" + +#include "fallback_capture.h" +#include "livekit/livekit.h" +#include "sdl_video_renderer.h" +#include +#include +#include + +// ---------------- SDLMicSource ---------------- + +class SDLMicSource { +public: + using AudioCallback = + std::function; + + SDLMicSource(int sample_rate, int channels, int frame_samples, + AudioCallback cb) + : sample_rate_(sample_rate), channels_(channels), + frame_samples_(frame_samples), callback_(std::move(cb)) {} + + ~SDLMicSource() { + if (stream_) { + SDL_DestroyAudioStream(stream_); + stream_ = nullptr; + } + } + + bool init() { + SDL_zero(spec_); + spec_.format = SDL_AUDIO_S16; + spec_.channels = static_cast(channels_); + spec_.freq = sample_rate_; + + stream_ = SDL_OpenAudioDeviceStream(SDL_AUDIO_DEVICE_DEFAULT_RECORDING, + &spec_, nullptr, nullptr); + + if (!stream_) { + std::cerr << "Failed to open recording stream: " << SDL_GetError() + << std::endl; + return false; + } + + if (!SDL_ResumeAudioStreamDevice(stream_)) { + std::cerr << "Failed to resume recording device: " << SDL_GetError() + << std::endl; + return false; + } + + return true; + } + + void pump() { + if (!stream_ || !callback_) + return; + + const int totalSamples = frame_samples_ * channels_; + const int bytes_per_frame = + totalSamples * static_cast(sizeof(int16_t)); + + const int available = SDL_GetAudioStreamAvailable(stream_); + if (available < bytes_per_frame) + return; + + std::vector buffer(totalSamples); + + const int got_bytes = + SDL_GetAudioStreamData(stream_, buffer.data(), bytes_per_frame); + if (got_bytes <= 0) + return; + + const int samplesTotal = got_bytes / static_cast(sizeof(int16_t)); + const int samplesPerChannel = samplesTotal / channels_; + + callback_(buffer.data(), samplesPerChannel, sample_rate_, channels_); + } + +private: + SDL_AudioStream *stream_ = nullptr; + SDL_AudioSpec spec_{}; + int sample_rate_; + int channels_; + int frame_samples_; + AudioCallback callback_; +}; + +// ---------------- SDLCamSource ---------------- + +class SDLCamSource { +public: + using VideoCallback = std::function; + + SDLCamSource(int desired_width, int desired_height, int desired_fps, + SDL_PixelFormat pixelFormat, VideoCallback cb) + : width_(desired_width), height_(desired_height), fps_(desired_fps), + format_(pixelFormat), callback_(std::move(cb)) {} + + ~SDLCamSource() { + if (camera_) { + SDL_CloseCamera(camera_); + camera_ = nullptr; + } + } + + bool init() { + int count = 0; + SDL_CameraID *cams = SDL_GetCameras(&count); + if (!cams || count == 0) { + std::cerr << "No camera devices found (SDL): " << SDL_GetError() + << std::endl; + if (cams) + SDL_free(cams); + return false; + } + + SDL_CameraID camId = cams[0]; + SDL_free(cams); + + SDL_zero(spec_); + spec_.format = format_; + spec_.width = width_; + spec_.height = height_; + spec_.framerate_numerator = fps_; + spec_.framerate_denominator = 1; + + camera_ = SDL_OpenCamera(camId, &spec_); + if (!camera_) { + std::cerr << "Failed to open camera: " << SDL_GetError() << std::endl; + return false; + } + + return true; + } + + void pump() { + if (!camera_ || !callback_) + return; + + Uint64 tsNS = 0; + SDL_Surface *surf = SDL_AcquireCameraFrame(camera_, &tsNS); + if (!surf) + return; + + callback_(static_cast(surf->pixels), surf->pitch, surf->w, + surf->h, surf->format, tsNS); + + SDL_ReleaseCameraFrame(camera_, surf); + } + +private: + SDL_Camera *camera_ = nullptr; + SDL_CameraSpec spec_{}; + int width_; + int height_; + int fps_; + SDL_PixelFormat format_; + VideoCallback callback_; +}; + +// ---------------- SDLMediaManager implementation ---------------- + +SDLMediaManager::SDLMediaManager() = default; + +SDLMediaManager::~SDLMediaManager() { + stopMic(); + stopCamera(); + stopSpeaker(); +} + +bool SDLMediaManager::ensureSDLInit(Uint32 flags) { + if ((SDL_WasInit(flags) & flags) == flags) { + return true; // already init + } + if (!SDL_InitSubSystem(flags)) { + std::cerr << "SDL_InitSubSystem failed (flags=" << flags + << "): " << SDL_GetError() << std::endl; + return false; + } + return true; +} + +// ---------- Mic control ---------- + +bool SDLMediaManager::startMic( + const std::shared_ptr &audio_source) { + stopMic(); + + if (!audio_source) { + std::cerr << "startMic: audioSource is null\n"; + return false; + } + + mic_source_ = audio_source; + mic_running_.store(true, std::memory_order_relaxed); + + // Try SDL path + if (!ensureSDLInit(SDL_INIT_AUDIO)) { + std::cerr << "No SDL audio, falling back to noise loop.\n"; + mic_using_sdl_ = false; + mic_thread_ = + std::thread(runNoiseCaptureLoop, mic_source_, std::ref(mic_running_)); + return true; + } + + int recCount = 0; + SDL_AudioDeviceID *recDevs = SDL_GetAudioRecordingDevices(&recCount); + if (!recDevs || recCount == 0) { + std::cerr << "No microphone devices found, falling back to noise loop.\n"; + if (recDevs) + SDL_free(recDevs); + mic_using_sdl_ = false; + mic_thread_ = + std::thread(runNoiseCaptureLoop, mic_source_, std::ref(mic_running_)); + return true; + } + SDL_free(recDevs); + + // We have at least one mic; use SDL + mic_using_sdl_ = true; + + mic_sdl_ = std::make_unique( + mic_source_->sample_rate(), mic_source_->num_channels(), + mic_source_->sample_rate() / 100, // ~10ms + [src = mic_source_](const int16_t *samples, int num_samples_per_channel, + int sample_rate, int num_channels) { + AudioFrame frame = AudioFrame::create(sample_rate, num_channels, + num_samples_per_channel); + std::memcpy(frame.data().data(), samples, + num_samples_per_channel * num_channels * sizeof(int16_t)); + try { + src->captureFrame(frame); + } catch (const std::exception &e) { + std::cerr << "Error in captureFrame (SDL mic): " << e.what() + << std::endl; + } + }); + + if (!mic_sdl_->init()) { + std::cerr << "Failed to init SDL mic, falling back to noise loop.\n"; + mic_using_sdl_ = false; + mic_sdl_.reset(); + mic_thread_ = + std::thread(runNoiseCaptureLoop, mic_source_, std::ref(mic_running_)); + return true; + } + + mic_thread_ = std::thread(&SDLMediaManager::micLoopSDL, this); + return true; +} + +void SDLMediaManager::micLoopSDL() { + while (mic_running_.load(std::memory_order_relaxed)) { + mic_sdl_->pump(); + std::this_thread::sleep_for(std::chrono::milliseconds(1)); + } +} + +void SDLMediaManager::stopMic() { + mic_running_.store(false, std::memory_order_relaxed); + if (mic_thread_.joinable()) { + mic_thread_.join(); + } + mic_sdl_.reset(); + mic_source_.reset(); +} + +// ---------- Camera control ---------- + +bool SDLMediaManager::startCamera( + const std::shared_ptr &video_source) { + stopCamera(); + + if (!video_source) { + std::cerr << "startCamera: videoSource is null\n"; + return false; + } + + cam_source_ = video_source; + cam_running_.store(true, std::memory_order_relaxed); + + // Try SDL + if (!ensureSDLInit(SDL_INIT_CAMERA)) { + std::cerr << "No SDL camera subsystem, using fake video loop.\n"; + cam_using_sdl_ = false; + cam_thread_ = std::thread(runFakeVideoCaptureLoop, cam_source_, + std::ref(cam_running_)); + return true; + } + + int camCount = 0; + SDL_CameraID *cams = SDL_GetCameras(&camCount); + if (!cams || camCount == 0) { + std::cerr << "No camera devices found, using fake video loop.\n"; + if (cams) + SDL_free(cams); + cam_using_sdl_ = false; + cam_thread_ = std::thread(runFakeVideoCaptureLoop, cam_source_, + std::ref(cam_running_)); + return true; + } + SDL_free(cams); + + cam_using_sdl_ = true; + can_sdl_ = std::make_unique( + 1280, 720, 30, + SDL_PIXELFORMAT_RGBA32, // Note SDL_PIXELFORMAT_RGBA8888 is not compatable + // with Livekit RGBA format. + [src = cam_source_](const uint8_t *pixels, int pitch, int width, + int height, SDL_PixelFormat /*fmt*/, + Uint64 timestampNS) { + auto frame = LKVideoFrame::create(width, height, VideoBufferType::RGBA); + uint8_t *dst = frame.data(); + const int dstPitch = width * 4; + + for (int y = 0; y < height; ++y) { + std::memcpy(dst + y * dstPitch, pixels + y * pitch, dstPitch); + } + + try { + src->captureFrame(frame, timestampNS / 1000, + VideoRotation::VIDEO_ROTATION_0); + } catch (const std::exception &e) { + std::cerr << "Error in captureFrame (SDL cam): " << e.what() + << std::endl; + } + }); + + if (!can_sdl_->init()) { + std::cerr << "Failed to init SDL camera, using fake video loop.\n"; + cam_using_sdl_ = false; + can_sdl_.reset(); + cam_thread_ = std::thread(runFakeVideoCaptureLoop, cam_source_, + std::ref(cam_running_)); + return true; + } + + cam_thread_ = std::thread(&SDLMediaManager::cameraLoopSDL, this); + return true; +} + +void SDLMediaManager::cameraLoopSDL() { + while (cam_running_.load(std::memory_order_relaxed)) { + can_sdl_->pump(); + std::this_thread::sleep_for(std::chrono::milliseconds(1)); + } +} + +void SDLMediaManager::stopCamera() { + cam_running_.store(false, std::memory_order_relaxed); + if (cam_thread_.joinable()) { + cam_thread_.join(); + } + can_sdl_.reset(); + cam_source_.reset(); +} + +// ---------- Speaker control (placeholder) ---------- + +bool SDLMediaManager::startSpeaker( + const std::shared_ptr &audio_stream) { + stopSpeaker(); + + if (!audio_stream) { + std::cerr << "startSpeaker: audioStream is null\n"; + return false; + } + + if (!ensureSDLInit(SDL_INIT_AUDIO)) { + std::cerr << "startSpeaker: SDL_INIT_AUDIO failed\n"; + return false; + } + + speaker_stream_ = audio_stream; + speaker_running_.store(true, std::memory_order_relaxed); + + // Note, we don't open the speaker since the format is unknown yet. + // Instead, open the speaker in the speakerLoopSDL thread with the native + // format. + try { + speaker_thread_ = std::thread(&SDLMediaManager::speakerLoopSDL, this); + } catch (const std::exception &e) { + std::cerr << "startSpeaker: failed to start speaker thread: " << e.what() + << "\n"; + speaker_running_.store(false, std::memory_order_relaxed); + speaker_stream_.reset(); + return false; + } + + return true; +} + +void SDLMediaManager::speakerLoopSDL() { + SDL_AudioStream *localStream = nullptr; + SDL_AudioDeviceID dev = 0; + + while (speaker_running_.load(std::memory_order_relaxed)) { + if (!speaker_stream_) { + break; + } + + livekit::AudioFrameEvent ev; + if (!speaker_stream_->read(ev)) { + // EOS or closed + break; + } + + const livekit::AudioFrame &frame = ev.frame; + const auto &data = frame.data(); + if (data.empty()) { + continue; + } + + // Lazily open SDL audio stream based on the first frame's format, so no + // resampler is needed. + if (!localStream) { + SDL_AudioSpec want{}; + want.format = SDL_AUDIO_S16; + want.channels = static_cast(frame.num_channels()); + want.freq = frame.sample_rate(); + + localStream = + SDL_OpenAudioDeviceStream(SDL_AUDIO_DEVICE_DEFAULT_PLAYBACK, &want, + /*callback=*/nullptr, + /*userdata=*/nullptr); + + if (!localStream) { + std::cerr << "speakerLoopSDL: SDL_OpenAudioDeviceStream failed: " + << SDL_GetError() << "\n"; + break; + } + + sdl_audio_stream_ = localStream; // store if you want to inspect later + + dev = SDL_GetAudioStreamDevice(localStream); + if (dev == 0) { + std::cerr << "speakerLoopSDL: SDL_GetAudioStreamDevice failed: " + << SDL_GetError() << "\n"; + break; + } + + if (!SDL_ResumeAudioDevice(dev)) { + std::cerr << "speakerLoopSDL: SDL_ResumeAudioDevice failed: " + << SDL_GetError() << "\n"; + break; + } + } + + // Push PCM to SDL. We assume frames are already S16, interleaved, matching + // sample_rate / channels we used above. + const int numBytes = static_cast(data.size() * sizeof(std::int16_t)); + + if (!SDL_PutAudioStreamData(localStream, data.data(), numBytes)) { + std::cerr << "speakerLoopSDL: SDL_PutAudioStreamData failed: " + << SDL_GetError() << "\n"; + break; + } + + // Tiny sleep to avoid busy loop; SDL buffers internally. + SDL_Delay(2); + } + + if (localStream) { + SDL_DestroyAudioStream(localStream); + localStream = nullptr; + sdl_audio_stream_ = nullptr; + } + + speaker_running_.store(false, std::memory_order_relaxed); +} + +void SDLMediaManager::stopSpeaker() { + speaker_running_.store(false, std::memory_order_relaxed); + if (speaker_thread_.joinable()) { + speaker_thread_.join(); + } + if (sdl_audio_stream_) { + SDL_DestroyAudioStream(sdl_audio_stream_); + sdl_audio_stream_ = nullptr; + } + speaker_stream_.reset(); +} + +// ---------- Renderer control (placeholder) ---------- + +bool SDLMediaManager::initRenderer( + const std::shared_ptr &video_stream) { + if (!video_stream) { + std::cerr << "startRenderer: videoStream is null\n"; + return false; + } + // Ensure SDL video subsystem is initialized + if (!ensureSDLInit(SDL_INIT_VIDEO)) { + std::cerr << "startRenderer: SDL_INIT_VIDEO failed\n"; + return false; + } + renderer_stream_ = video_stream; + renderer_running_.store(true, std::memory_order_relaxed); + + // Lazily create the SDLVideoRenderer + if (!sdl_renderer_) { + sdl_renderer_ = std::make_unique(); + // You can tune these dimensions or even make them options + if (!sdl_renderer_->init("LiveKit Remote Video", 1280, 720)) { + std::cerr << "startRenderer: SDLVideoRenderer::init failed\n"; + sdl_renderer_.reset(); + renderer_stream_.reset(); + renderer_running_.store(false, std::memory_order_relaxed); + return false; + } + } + + // Start the SDL renderer's own render thread + sdl_renderer_->setStream(renderer_stream_); + + return true; +} + +void SDLMediaManager::shutdownRenderer() { + renderer_running_.store(false, std::memory_order_relaxed); + + // Shut down SDL renderer thread if it exists + if (sdl_renderer_) { + sdl_renderer_->shutdown(); + } + + // Old renderer_thread_ is no longer used, but if you still have it: + if (renderer_thread_.joinable()) { + renderer_thread_.join(); + } + + renderer_stream_.reset(); +} + +void SDLMediaManager::render() { + if (renderer_running_.load(std::memory_order_relaxed) && sdl_renderer_) { + sdl_renderer_->render(); + } +} \ No newline at end of file diff --git a/examples/simple_room/sdl_media_manager.h b/examples/simple_room/sdl_media_manager.h new file mode 100644 index 0000000..cd9ba46 --- /dev/null +++ b/examples/simple_room/sdl_media_manager.h @@ -0,0 +1,109 @@ +/* + * Copyright 2025 LiveKit, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include +#include +#include + +#include +#include +#include + +#include "wav_audio_source.h" + +namespace livekit { +class AudioSource; +class VideoSource; +class AudioStream; +class VideoStream; +} // namespace livekit + +// Forward-declared SDL helpers (you can also keep these separate if you like) +class SDLMicSource; +class SDLCamSource; +class SDLVideoRenderer; + +// SDLMediaManager gives you dedicated control over: +// - mic capture -> AudioSource +// - camera capture -> VideoSource +// - speaker playback -> AudioStream (TODO: integrate your API) +// - renderer -> VideoStream (TODO: integrate your API) +class SDLMediaManager { +public: + SDLMediaManager(); + ~SDLMediaManager(); + + // Mic (local capture -> AudioSource) + bool startMic(const std::shared_ptr &audio_source); + void stopMic(); + + // Camera (local capture -> VideoSource) + bool startCamera(const std::shared_ptr &video_source); + void stopCamera(); + + // Speaker (remote audio playback) + bool startSpeaker(const std::shared_ptr &audio_stream); + void stopSpeaker(); + + // Renderer (remote video rendering) + // Following APIs must be called on main thread + bool initRenderer(const std::shared_ptr &video_stream); + void shutdownRenderer(); + void render(); + +private: + // ---- SDL bootstrap helpers ---- + bool ensureSDLInit(Uint32 flags); + + // ---- Mic helpers ---- + void micLoopSDL(); + void micLoopNoise(); + + // ---- Camera helpers ---- + void cameraLoopSDL(); + void cameraLoopFake(); + + // ---- Speaker helpers (TODO: wire AudioStream -> SDL audio) ---- + void speakerLoopSDL(); + + // Mic + std::shared_ptr mic_source_; + std::unique_ptr mic_sdl_; + std::thread mic_thread_; + std::atomic mic_running_{false}; + bool mic_using_sdl_ = false; + + // Camera + std::shared_ptr cam_source_; + std::unique_ptr can_sdl_; + std::thread cam_thread_; + std::atomic cam_running_{false}; + bool cam_using_sdl_ = false; + + // Speaker (remote audio) – left mostly as a placeholder + std::shared_ptr speaker_stream_; + std::thread speaker_thread_; + std::atomic speaker_running_{false}; + SDL_AudioStream *sdl_audio_stream_ = nullptr; + + // Renderer (remote video) – left mostly as a placeholder + std::unique_ptr sdl_renderer_; + std::shared_ptr renderer_stream_; + std::thread renderer_thread_; + std::atomic renderer_running_{false}; +}; diff --git a/examples/simple_room/sdl_video_renderer.cpp b/examples/simple_room/sdl_video_renderer.cpp new file mode 100644 index 0000000..55aa799 --- /dev/null +++ b/examples/simple_room/sdl_video_renderer.cpp @@ -0,0 +1,165 @@ +/* + * Copyright 2025 LiveKit + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an “AS IS” BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "sdl_video_renderer.h" + +#include "livekit/livekit.h" +#include +#include + +using namespace livekit; + +SDLVideoRenderer::SDLVideoRenderer() = default; + +SDLVideoRenderer::~SDLVideoRenderer() { shutdown(); } + +bool SDLVideoRenderer::init(const char *title, int width, int height) { + width_ = width; + height_ = height; + + // Assume SDL_Init(SDL_INIT_VIDEO) already called in main() + window_ = SDL_CreateWindow(title, width_, height_, 0); + if (!window_) { + std::cerr << "SDL_CreateWindow failed: " << SDL_GetError() << "\n"; + return false; + } + + renderer_ = SDL_CreateRenderer(window_, nullptr); + if (!renderer_) { + std::cerr << "SDL_CreateRenderer failed: " << SDL_GetError() << "\n"; + return false; + } + + // Note, web will send out BGRA as default, and we can't use ARGB since ffi + // does not support converting from BGRA to ARGB. + texture_ = SDL_CreateTexture(renderer_, SDL_PIXELFORMAT_RGBA8888, + SDL_TEXTUREACCESS_STREAMING, width_, height_); + if (!texture_) { + std::cerr << "SDL_CreateTexture failed: " << SDL_GetError() << "\n"; + return false; + } + + return true; +} + +void SDLVideoRenderer::shutdown() { + if (texture_) { + SDL_DestroyTexture(texture_); + texture_ = nullptr; + } + if (renderer_) { + SDL_DestroyRenderer(renderer_); + renderer_ = nullptr; + } + if (window_) { + SDL_DestroyWindow(window_); + window_ = nullptr; + } + + stream_.reset(); +} + +void SDLVideoRenderer::setStream(std::shared_ptr stream) { + stream_ = std::move(stream); +} + +void SDLVideoRenderer::render() { + // 0) Basic sanity + if (!window_ || !renderer_) { + return; + } + + // 1) Pump SDL events on the main thread + SDL_Event e; + while (SDL_PollEvent(&e)) { + if (e.type == SDL_EVENT_QUIT) { + // TODO: set some global or member flag if you want to quit the app + } + } + + // 2) If no stream, nothing to render + if (!stream_) { + return; + } + + // 3) Read a frame from VideoStream (blocking until one is available) + livekit::VideoFrameEvent vfe; + bool gotFrame = stream_->read(vfe); + if (!gotFrame) { + // EOS / closed – nothing more to render + return; + } + + livekit::LKVideoFrame &frame = vfe.frame; + + // 4) Ensure the frame is RGBA. + // Ideally you requested RGBA from VideoStream::Options so this is a no-op. + if (frame.type() != livekit::VideoBufferType::RGBA) { + try { + frame = frame.convert(livekit::VideoBufferType::RGBA, false); + } catch (const std::exception &ex) { + std::cerr << "SDLVideoRenderer: convert to RGBA failed: " << ex.what() + << "\n"; + return; + } + } + + // Handle size change: recreate texture if needed + if (frame.width() != width_ || frame.height() != height_) { + width_ = frame.width(); + height_ = frame.height(); + + if (texture_) { + SDL_DestroyTexture(texture_); + texture_ = nullptr; + } + texture_ = SDL_CreateTexture( + renderer_, + SDL_PIXELFORMAT_RGBA32, // Note, SDL_PIXELFORMAT_RGBA8888 is not + // compatible with Livekit RGBA format. + SDL_TEXTUREACCESS_STREAMING, width_, height_); + if (!texture_) { + std::cerr << "SDLVideoRenderer: SDL_CreateTexture failed: " + << SDL_GetError() << "\n"; + return; + } + } + + // 6) Upload RGBA data to SDL texture + void *pixels = nullptr; + int pitch = 0; + if (!SDL_LockTexture(texture_, nullptr, &pixels, &pitch)) { + std::cerr << "SDLVideoRenderer: SDL_LockTexture failed: " << SDL_GetError() + << "\n"; + return; + } + + const std::uint8_t *src = frame.data(); + const int srcPitch = frame.width() * 4; // RGBA: 4 bytes per pixel + + for (int y = 0; y < frame.height(); ++y) { + std::memcpy(static_cast(pixels) + y * pitch, + src + y * srcPitch, srcPitch); + } + + SDL_UnlockTexture(texture_); + + // 7) Present + SDL_SetRenderDrawColor(renderer_, 0, 0, 0, 255); + SDL_RenderClear(renderer_); + SDL_RenderTexture(renderer_, texture_, nullptr, nullptr); + SDL_RenderPresent(renderer_); +} diff --git a/examples/simple_room/sdl_video_renderer.h b/examples/simple_room/sdl_video_renderer.h new file mode 100644 index 0000000..6e666ea --- /dev/null +++ b/examples/simple_room/sdl_video_renderer.h @@ -0,0 +1,52 @@ +/* + * Copyright 2025 LiveKit + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an “AS IS” BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include +#include +#include +#include + +namespace livekit { +class VideoStream; +} + +class SDLVideoRenderer { +public: + SDLVideoRenderer(); + ~SDLVideoRenderer(); + + // Must be called on main thread, after SDL_Init(SDL_INIT_VIDEO). + bool init(const char *title, int width, int height); + + // Set/replace the stream to render. Safe to call from main thread. + void setStream(std::shared_ptr stream); + + // Called on main thread each tick to pump events and draw latest frame. + void render(); + + void shutdown(); // destroy window/renderer/texture + +private: + SDL_Window *window_ = nullptr; + SDL_Renderer *renderer_ = nullptr; + SDL_Texture *texture_ = nullptr; + + std::shared_ptr stream_; + int width_ = 0; + int height_ = 0; +}; diff --git a/include/livekit/audio_frame.h b/include/livekit/audio_frame.h index ae43351..529d658 100644 --- a/include/livekit/audio_frame.h +++ b/include/livekit/audio_frame.h @@ -42,6 +42,7 @@ class AudioFrame { */ AudioFrame(std::vector data, int sample_rate, int num_channels, int samples_per_channel); + AudioFrame(); // Default constructor /** * Create a new zero-initialized AudioFrame instance. diff --git a/include/livekit/audio_stream.h b/include/livekit/audio_stream.h index 84aac8a..a1b043c 100644 --- a/include/livekit/audio_stream.h +++ b/include/livekit/audio_stream.h @@ -39,24 +39,48 @@ struct AudioFrameEvent { AudioFrame frame; }; +// Represents a pull-based stream of decoded PCM audio frames coming from +// a remote (or local) LiveKit track. Similar to VideoStream, but for audio. +// +// Typical usage: +// +// AudioStream::Options opts; +// auto stream = AudioStream::fromTrack(remoteAudioTrack, opts); +// +// AudioFrameEvent ev; +// while (stream->read(ev)) { +// // ev.frame contains interleaved int16 PCM samples +// } +// +// stream->close(); // optional, called automatically in destructor class AudioStream { public: + // Configuration options for AudioStream creation. struct Options { - std::size_t capacity{0}; // 0 = unbounded - int sample_rate{48000}; - int num_channels{1}; - std::string noise_cancellation_module; // empty = disabled - std::string noise_cancellation_options_json; // empty = no options + // Maximum number of AudioFrameEvent items buffered in the internal queue. + // 0 means "unbounded" (the queue can grow without limit). + // + // Using a small non-zero capacity gives ring-buffer semantics: + // if the queue is full, the oldest frame is dropped when a new one arrives. + std::size_t capacity{0}; + + // Optional: name of a noise cancellation module to enable for this stream. + // Empty string means "no noise cancellation". + std::string noise_cancellation_module; + + // Optional: JSON-encoded configuration for the noise cancellation module. + // Empty string means "use module defaults". + std::string noise_cancellation_options_json; }; // Factory: create an AudioStream bound to a specific Track - static std::unique_ptr - from_track(const std::shared_ptr &track, const Options &options); + static std::shared_ptr + fromTrack(const std::shared_ptr &track, const Options &options); // Factory: create an AudioStream from a Participant + TrackSource - static std::unique_ptr from_participant(Participant &participant, - TrackSource track_source, - const Options &options); + static std::shared_ptr fromParticipant(Participant &participant, + TrackSource track_source, + const Options &options); ~AudioStream(); @@ -65,28 +89,35 @@ class AudioStream { AudioStream(AudioStream &&) noexcept; AudioStream &operator=(AudioStream &&) noexcept; - /// Blocking read: returns true if a frame was delivered, - /// false if the stream has ended (EOS or closed). + /// Blocking read: waits until there is an AudioFrameEvent available in the + /// internal queue, or the stream reaches EOS / is closed. + /// + /// \param out_event On success, filled with the next audio frame. + /// \return true if a frame was delivered; false if the stream ended + /// (end-of-stream or close()) and no more data is available. bool read(AudioFrameEvent &out_event); - /// Signal that we are no longer interested in frames. - /// Disposes the underlying FFI stream and removes the listener. + /// Signal that we are no longer interested in audio frames. + /// + /// This disposes the underlying FFI audio stream, unregisters the listener + /// from FfiClient, marks the stream as closed, and wakes any blocking read(). + /// After calling close(), further calls to read() will return false. void close(); private: AudioStream() = default; - void init_from_track(const std::shared_ptr &track, - const Options &options); - void init_from_participant(Participant &participant, TrackSource track_source, - const Options &options); + void initFromTrack(const std::shared_ptr &track, + const Options &options); + void initFromParticipant(Participant &participant, TrackSource track_source, + const Options &options); // FFI event handler (registered with FfiClient) - void on_ffi_event(const proto::FfiEvent &event); + void onFfiEvent(const proto::FfiEvent &event); // Queue helpers - void push_frame(AudioFrameEvent &&ev); - void push_eos(); + void pushFrame(AudioFrameEvent &&ev); + void pushEos(); mutable std::mutex mutex_; std::condition_variable cv_; diff --git a/include/livekit/ffi_client.h b/include/livekit/ffi_client.h index 602f9c8..51cd226 100644 --- a/include/livekit/ffi_client.h +++ b/include/livekit/ffi_client.h @@ -38,6 +38,7 @@ class OwnedTrackPublication; class TranscriptionSegment; } // namespace proto +struct RoomOptions; struct TrackPublishOptions; using FfiCallbackFn = void (*)(const uint8_t *, size_t); @@ -76,7 +77,8 @@ class FfiClient { // Room APIs std::future connectAsync(const std::string &url, - const std::string &token); + const std::string &token, + const RoomOptions &options); // Track APIs std::future> getTrackStatsAsync(uintptr_t track_handle); diff --git a/include/livekit/livekit.h b/include/livekit/livekit.h index 4b5fc30..3eab63d 100644 --- a/include/livekit/livekit.h +++ b/include/livekit/livekit.h @@ -16,14 +16,17 @@ #include "audio_frame.h" #include "audio_source.h" +#include "audio_stream.h" #include "local_audio_track.h" #include "local_participant.h" #include "local_track_publication.h" #include "local_video_track.h" #include "participant.h" #include "remote_participant.h" +#include "remote_track_publication.h" #include "room.h" #include "room_delegate.h" #include "track_publication.h" #include "video_frame.h" -#include "video_source.h" \ No newline at end of file +#include "video_source.h" +#include "video_stream.h" \ No newline at end of file diff --git a/include/livekit/local_participant.h b/include/livekit/local_participant.h index 65dcb16..1077d73 100644 --- a/include/livekit/local_participant.h +++ b/include/livekit/local_participant.h @@ -38,8 +38,7 @@ struct Transcription; /** * Represents the local participant in a room. * - * C++ analogue of the Python LocalParticipant, built on top of the C++ - * Participant base class. + * LocalParticipant, built on top of the participant.h base class. */ class LocalParticipant : public Participant { public: diff --git a/include/livekit/participant.h b/include/livekit/participant.h index a0003fe..0cc7418 100644 --- a/include/livekit/participant.h +++ b/include/livekit/participant.h @@ -31,7 +31,6 @@ enum class ParticipantKind { Standard = 0, Ingress, Egress, Sip, Agent }; class Participant { public: - // TODO, consider holding a weak ptr of FfiHandle if it is useful. Participant(FfiHandle handle, std::string sid, std::string name, std::string identity, std::string metadata, std::unordered_map attributes, diff --git a/include/livekit/remote_audio_track.h b/include/livekit/remote_audio_track.h index 97ca2df..99e4c8e 100644 --- a/include/livekit/remote_audio_track.h +++ b/include/livekit/remote_audio_track.h @@ -33,11 +33,7 @@ class AudioSource; // ============================================================ class RemoteAudioTrack : public Track { public: - explicit RemoteAudioTrack(FfiHandle handle, const proto::OwnedTrack &track); - - static std::shared_ptr - createRemoteAudioTrack(const std::string &name, - const std::shared_ptr &source); + explicit RemoteAudioTrack(const proto::OwnedTrack &track); std::string to_string() const; diff --git a/include/livekit/remote_video_track.h b/include/livekit/remote_video_track.h index cbed139..c6abfce 100644 --- a/include/livekit/remote_video_track.h +++ b/include/livekit/remote_video_track.h @@ -33,11 +33,7 @@ class VideoSource; // ============================================================ class RemoteVideoTrack : public Track { public: - explicit RemoteVideoTrack(FfiHandle handle, const proto::OwnedTrack &track); - - static std::shared_ptr - createRemoteVideoTrack(const std::string &name, - const std::shared_ptr &source); + explicit RemoteVideoTrack(const proto::OwnedTrack &track); std::string to_string() const; diff --git a/include/livekit/room.h b/include/livekit/room.h index 98077ed..f2e21d4 100644 --- a/include/livekit/room.h +++ b/include/livekit/room.h @@ -34,16 +34,146 @@ class FfiEvent; class LocalParticipant; class RemoteParticipant; +// Represents end-to-end encryption (E2EE) settings. +// Mirrors python sdk: `E2EEOptions` +struct E2EEOptions { + // Encryption algorithm type. + int encryption_type = 0; + + // Shared static key. If provided, this key is used for encryption. + std::string shared_key; + + // Salt used when deriving ratcheted encryption keys. + std::string ratchet_salt; + + // How many consecutive ratcheting failures are tolerated before an error. + int failure_tolerance = 0; + + // Maximum size of the ratchet window. + int ratchet_window_size = 0; +}; + +// Represents a single ICE server configuration. +// Mirrors python: RtcConfiguration.ice_servers[*] +struct IceServer { + // TURN/STUN server URL (e.g. "stun:stun.l.google.com:19302"). + std::string url; + + // Optional username for TURN authentication. + std::string username; + + // Optional credential (password) for TURN authentication. + std::string credential; +}; + +// WebRTC configuration (ICE, transport, etc.). +// Mirrors python: `RtcConfiguration` +struct RtcConfig { + // ICE transport type (e.g., ALL, RELAY). Maps to proto::IceTransportType. + int ice_transport_type = 0; + + // Continuous or single ICE gathering. Maps to + // proto::ContinualGatheringPolicy. + int continual_gathering_policy = 0; + + // List of STUN/TURN servers for ICE candidate generation. + std::vector ice_servers; +}; + +// Top-level room connection options. +// Mirrors python: `RoomOptions` +struct RoomOptions { + // If true (default), automatically subscribe to all remote tracks. + // This is CRITICAL. Without auto_subscribe, you will never receive: + // - `track_subscribed` events + // - remote audio/video frames + bool auto_subscribe = true; + + // Enable dynacast (server sends optimal layers depending on subscribers). + bool dynacast = false; + + // Optional end-to-end encryption settings. + std::optional e2ee; + + // Optional WebRTC configuration (ICE policy, servers, etc.) + std::optional rtc_config; +}; + +// Represents a LiveKit room session. +// A Room manages: +// - the connection to the LiveKit server +// - participant list (local + remote) +// - track publications +// - server events forwarded to a RoomDelegate class Room { public: Room(); ~Room(); + + // Assign a RoomDelegate that receives room lifecycle callbacks. + // + // The delegate must remain valid for the lifetime of the Room or until a + // different delegate is assigned. The Room does not take ownership. + // Typical usage: + // class MyDelegate : public RoomDelegate { ... }; + // MyDelegate del; + // Room room; + // room.setDelegate(&del); void setDelegate(RoomDelegate *delegate); - bool Connect(const std::string &url, const std::string &token); + + // Connect to a LiveKit room using the given URL and token, applying the + // supplied connection options. + // + // Parameters: + // url — WebSocket URL of the LiveKit server. + // token — Access token for authentication. + // options — Connection options controlling auto-subscribe, + // dynacast, E2EE, and WebRTC configuration. + // Behavior: + // - Registers an FFI event listener *before* sending the connect request. + // - Sends a proto::FfiRequest::Connect with the URL, token, + // and the provided RoomOptions. + // - Blocks until the FFI connect response arrives. + // - Initializes local participant and remote participants. + // - Emits room/participant/track events to the delegate. + // IMPORTANT: + // RoomOptions defaults auto_subscribe = true. + // Without auto_subscribe enabled, remote tracks will NOT be subscribed + // automatically, and no remote audio/video will ever arrive. + bool Connect(const std::string &url, const std::string &token, + const RoomOptions &options); // Accessors + + // Retrieve static metadata about the room. + // This contains fields such as: + // - SID + // - room name + // - metadata + // - participant counts + // - creation timestamp RoomInfoData room_info() const; + + // Get the local participant. + // + // This object represents the current user, including: + // - published tracks (audio/video/screen) + // - identity, SID, metadata + // - publishing/unpublishing operations + // Return value: + // Non-null pointer after successful Connect(). LocalParticipant *local_participant() const; + + // Look up a remote participant by identity. + // + // Parameters: + // identity — The participant’s identity string (not SID) + // Return value: + // Pointer to RemoteParticipant if present, otherwise nullptr. + // RemoteParticipant contains: + // - identity/name/metadata + // - track publications + // - callbacks for track subscribed/unsubscribed, muted/unmuted RemoteParticipant *remote_participant(const std::string &identity) const; private: @@ -53,7 +183,7 @@ class Room { RoomInfoData room_info_; std::shared_ptr room_handle_; std::unique_ptr local_participant_; - std::unordered_map> + std::unordered_map> remote_participants_; void OnEvent(const proto::FfiEvent &event); diff --git a/include/livekit/room_delegate.h b/include/livekit/room_delegate.h index 72411fc..9073654 100644 --- a/include/livekit/room_delegate.h +++ b/include/livekit/room_delegate.h @@ -27,6 +27,9 @@ namespace livekit { class Room; enum class VideoCodec; enum class TrackSource; +class Track; +class RemoteTrackPublication; +class RemoteParticipant; enum class ConnectionQuality { Poor, @@ -237,11 +240,9 @@ struct TrackUnpublishedEvent { }; struct TrackSubscribedEvent { - std::string participant_identity; - std::string track_sid; - std::string track_name; - std::string track_kind; // or enum - std::string track_source; // or enum + std::shared_ptr track; + std::shared_ptr publication; + RemoteParticipant *participant = nullptr; }; struct TrackUnsubscribedEvent { diff --git a/include/livekit/stats.h b/include/livekit/stats.h index c9aa1f6..9a0c51c 100644 --- a/include/livekit/stats.h +++ b/include/livekit/stats.h @@ -49,10 +49,6 @@ class CertificateStats; class StreamStats; } // namespace proto -// ---------------------- -// SDK enums (decoupled from proto enums) -// ---------------------- - enum class DataChannelState { Connecting, Open, diff --git a/include/livekit/track.h b/include/livekit/track.h index 2c5f9a9..2487d39 100644 --- a/include/livekit/track.h +++ b/include/livekit/track.h @@ -25,9 +25,10 @@ #include #include +#include + namespace livekit { -// ----- Enums from track.proto ----- enum class TrackKind { KIND_UNKNOWN = 0, KIND_AUDIO = 1, diff --git a/include/livekit/track_publication.h b/include/livekit/track_publication.h index 8503188..19b8055 100644 --- a/include/livekit/track_publication.h +++ b/include/livekit/track_publication.h @@ -38,7 +38,7 @@ class LocalTrack; class RemoteTrack; /** - * C++ analogue of Python TrackPublication. + * C++ TrackPublication. * * Wraps the immutable publication info plus an FFI handle, and * holds a weak reference to the associated Track (if any). @@ -72,8 +72,9 @@ class TrackPublication { uintptr_t ffiHandleId() const noexcept { return handle_.get(); } /// Associated Track (if attached). - std::shared_ptr track() const noexcept { return track_.lock(); } + std::shared_ptr track() const noexcept { return track_; } void setTrack(const std::shared_ptr &track) noexcept { + std::cout << "track_ is null " << (track_.get() == nullptr) << std::endl; track_ = track; } @@ -86,7 +87,7 @@ class TrackPublication { std::vector audio_features); FfiHandle handle_; - std::weak_ptr track_; + std::shared_ptr track_; std::string sid_; std::string name_; diff --git a/include/livekit/video_frame.h b/include/livekit/video_frame.h index a43dc21..878aaca 100644 --- a/include/livekit/video_frame.h +++ b/include/livekit/video_frame.h @@ -25,9 +25,9 @@ namespace livekit { // Mirror of WebRTC video buffer type enum class VideoBufferType { - ARGB, + RGBA = 0, ABGR, - RGBA, + ARGB, BGRA, RGB24, I420, @@ -58,7 +58,7 @@ class OwnedVideoBuffer; */ class LKVideoFrame { public: - LKVideoFrame() = delete; + LKVideoFrame(); LKVideoFrame(int width, int height, VideoBufferType type, std::vector data); diff --git a/include/livekit/video_stream.h b/include/livekit/video_stream.h index 322ab1d..fea3a93 100644 --- a/include/livekit/video_stream.h +++ b/include/livekit/video_stream.h @@ -31,7 +31,7 @@ namespace livekit { -// C++ equivalent of Python VideoFrameEvent +// A single video frame event delivered by VideoStream::read(). struct VideoFrameEvent { LKVideoFrame frame; std::int64_t timestamp_us; @@ -42,19 +42,42 @@ namespace proto { class FfiEvent; } +// Represents a pull-based stream of decoded PCM audio frames coming from +// a remote (or local) LiveKit track. Similar to VideoStream, but for audio. +// +// Typical usage: +// +// AudioStream::Options opts; +// auto stream = AudioStream::fromTrack(remoteAudioTrack, opts); +// +// AudioFrameEvent ev; +// while (stream->read(ev)) { +// // ev.frame contains interleaved int16 PCM samples +// } +// +// stream->close(); // optional, called automatically in destructor +// class VideoStream { public: struct Options { - std::size_t capacity{0}; // 0 = unbounded - VideoBufferType format; + // Maximum number of VideoFrameEvent items buffered in the internal queue. + // 0 means "unbounded" (the queue can grow without limit). + // + // With a non-zero capacity, the queue behaves like a ring-buffer: if it + // is full, the oldest frame is dropped when a new one arrives. + std::size_t capacity{0}; + + // Preferred pixel format for frames delivered by read(). The FFI layer + // converts into this format if supported (e.g., RGBA, BGRA, I420, ...). + VideoBufferType format{VideoBufferType::RGBA}; }; // Factory: create a VideoStream bound to a specific Track - static std::unique_ptr + static std::shared_ptr fromTrack(const std::shared_ptr &track, const Options &options); // Factory: create a VideoStream from a Participant + TrackSource - static std::unique_ptr fromParticipant(Participant &participant, + static std::shared_ptr fromParticipant(Participant &participant, TrackSource track_source, const Options &options); @@ -65,12 +88,19 @@ class VideoStream { VideoStream(VideoStream &&) noexcept; VideoStream &operator=(VideoStream &&) noexcept; - /// Blocking read: returns true if a frame was delivered, - /// false if the stream has ended (EOS or closed). + /// Blocking read: waits until a VideoFrameEvent is available in the internal + /// queue, or the stream reaches EOS / is closed. + /// + /// \param out On success, filled with the next video frame event. + /// \return true if a frame was delivered; false if the stream ended + /// (end-of-stream or close()) and no more data is available. bool read(VideoFrameEvent &out); - /// Signal that we are no longer interested in frames. - /// Disposes the underlying FFI stream and drains internal listener. + /// Signal that we are no longer interested in video frames. + /// + /// This disposes the underlying FFI video stream, unregisters the listener + /// from FfiClient, marks the stream as closed, and wakes any blocking read(). + /// After calling close(), further calls to read() will return false. void close(); private: diff --git a/src/audio_frame.cpp b/src/audio_frame.cpp index 0df9f76..23ff01e 100644 --- a/src/audio_frame.cpp +++ b/src/audio_frame.cpp @@ -28,6 +28,9 @@ namespace livekit { +AudioFrame::AudioFrame() + : sample_rate_(0), num_channels_(0), samples_per_channel_(0) {} + AudioFrame::AudioFrame(std::vector data, int sample_rate, int num_channels, int samples_per_channel) : data_(std::move(data)), sample_rate_(sample_rate), diff --git a/src/audio_stream.cpp b/src/audio_stream.cpp index 975908f..908c6b3 100644 --- a/src/audio_stream.cpp +++ b/src/audio_stream.cpp @@ -32,20 +32,19 @@ using proto::FfiRequest; // Factory helpers // ------------------------ -std::unique_ptr -AudioStream::from_track(const std::shared_ptr &track, - const Options &options) { - auto stream = std::unique_ptr(new AudioStream()); - stream->init_from_track(track, options); +std::shared_ptr +AudioStream::fromTrack(const std::shared_ptr &track, + const Options &options) { + auto stream = std::shared_ptr(new AudioStream()); + stream->initFromTrack(track, options); return stream; } -std::unique_ptr -AudioStream::from_participant(Participant &participant, - TrackSource track_source, - const Options &options) { - auto stream = std::unique_ptr(new AudioStream()); - stream->init_from_participant(participant, track_source, options); +std::shared_ptr +AudioStream::fromParticipant(Participant &participant, TrackSource track_source, + const Options &options) { + auto stream = std::shared_ptr(new AudioStream()); + stream->initFromParticipant(participant, track_source, options); return stream; } @@ -135,22 +134,24 @@ void AudioStream::close() { // Internal functions -void AudioStream::init_from_track(const std::shared_ptr &track, - const Options &options) { +void AudioStream::initFromTrack(const std::shared_ptr &track, + const Options &options) { capacity_ = options.capacity; options_ = options; // 1) Subscribe to FFI events listener_id_ = FfiClient::instance().AddListener( - [this](const FfiEvent &e) { this->on_ffi_event(e); }); + [this](const FfiEvent &e) { this->onFfiEvent(e); }); // 2) Send FfiRequest to create a new audio stream bound to this track FfiRequest req; auto *new_audio_stream = req.mutable_new_audio_stream(); new_audio_stream->set_track_handle( static_cast(track->ffi_handle_id())); - new_audio_stream->set_sample_rate(options_.sample_rate); - new_audio_stream->set_num_channels(options.num_channels); + // TODO, sample_rate and num_channels are not useful in AudioStream, remove it + // from FFI. + // new_audio_stream->set_sample_rate(options_.sample_rate); + // new_audio_stream->set_num_channels(options.num_channels); new_audio_stream->set_type(proto::AudioStreamType::AUDIO_STREAM_NATIVE); if (!options_.noise_cancellation_module.empty()) { @@ -167,22 +168,24 @@ void AudioStream::init_from_track(const std::shared_ptr &track, stream_handle_ = FfiHandle(static_cast(stream.handle().id())); } -void AudioStream::init_from_participant(Participant &participant, - TrackSource track_source, - const Options &options) { +void AudioStream::initFromParticipant(Participant &participant, + TrackSource track_source, + const Options &options) { capacity_ = options.capacity; options_ = options; // 1) Subscribe to FFI events listener_id_ = FfiClient::instance().AddListener( - [this](const FfiEvent &e) { this->on_ffi_event(e); }); + [this](const FfiEvent &e) { this->onFfiEvent(e); }); // 2) Send FfiRequest to create audio stream from participant + track source FfiRequest req; auto *as = req.mutable_audio_stream_from_participant(); as->set_participant_handle(participant.ffiHandleId()); - as->set_sample_rate(options_.sample_rate); - as->set_num_channels(options_.num_channels); + // TODO, sample_rate and num_channels are not useful in AudioStream, remove it + // from FFI. + // as->set_sample_rate(options_.sample_rate); + // as->set_num_channels(options_.num_channels); as->set_type(proto::AudioStreamType::AUDIO_STREAM_NATIVE); as->set_track_source(static_cast(track_source)); @@ -198,11 +201,10 @@ void AudioStream::init_from_participant(Participant &participant, stream_handle_ = FfiHandle(static_cast(stream.handle().id())); } -void AudioStream::on_ffi_event(const FfiEvent &event) { +void AudioStream::onFfiEvent(const FfiEvent &event) { if (event.message_case() != FfiEvent::kAudioStreamEvent) { return; } - const auto &ase = event.audio_stream_event(); // Check if this event is for our stream handle. if (ase.stream_handle() != static_cast(stream_handle_.get())) { @@ -216,13 +218,13 @@ void AudioStream::on_ffi_event(const FfiEvent &event) { // AudioFrame._from_owned_info. AudioFrame frame = AudioFrame::fromOwnedInfo(fr.frame()); AudioFrameEvent ev{std::move(frame)}; - push_frame(std::move(ev)); + pushFrame(std::move(ev)); } else if (ase.has_eos()) { - push_eos(); + pushEos(); } } -void AudioStream::push_frame(AudioFrameEvent &&ev) { +void AudioStream::pushFrame(AudioFrameEvent &&ev) { { std::lock_guard lock(mutex_); @@ -240,7 +242,7 @@ void AudioStream::push_frame(AudioFrameEvent &&ev) { cv_.notify_one(); } -void AudioStream::push_eos() { +void AudioStream::pushEos() { { std::lock_guard lock(mutex_); if (eof_) { diff --git a/src/ffi_client.cpp b/src/ffi_client.cpp index 0fe4d3b..5b426d0 100644 --- a/src/ffi_client.cpp +++ b/src/ffi_client.cpp @@ -17,11 +17,14 @@ #include #include "build.h" +#include "e2ee.pb.h" #include "ffi.pb.h" #include "livekit/ffi_client.h" #include "livekit/ffi_handle.h" +#include "livekit/room.h" // TODO, maybe avoid circular deps by moving RoomOptions to a room_types.h ? #include "livekit/track.h" #include "livekit_ffi.h" +#include "room.pb.h" #include "room_proto_converter.h" namespace livekit { @@ -133,14 +136,64 @@ std::future FfiClient::registerAsync( // Room APIs Implementation std::future -FfiClient::connectAsync(const std::string &url, const std::string &token) { +FfiClient::connectAsync(const std::string &url, const std::string &token, + const RoomOptions &options) { proto::FfiRequest req; auto *connect = req.mutable_connect(); connect->set_url(url); connect->set_token(token); - connect->mutable_options()->set_auto_subscribe(true); + auto *opts = connect->mutable_options(); + opts->set_auto_subscribe(options.auto_subscribe); + opts->set_dynacast(options.dynacast); + std::cout << "connectAsync " << std::endl; + // --- E2EE / encryption (optional) --- + if (options.e2ee.has_value()) { + std::cout << "connectAsync e2ee " << std::endl; + const E2EEOptions &eo = *options.e2ee; + + // Use the non-deprecated encryption field + auto *enc = opts->mutable_encryption(); + + enc->set_encryption_type( + static_cast(eo.encryption_type)); + + auto *kp = enc->mutable_key_provider_options(); + kp->set_shared_key(eo.shared_key); + kp->set_ratchet_salt(eo.ratchet_salt); + kp->set_failure_tolerance(eo.failure_tolerance); + kp->set_ratchet_window_size(eo.ratchet_window_size); + } + + // --- RTC configuration (optional) --- + if (options.rtc_config.has_value()) { + std::cout << "options.rtc_config.has_value() " << std::endl; + const RtcConfig &rc = *options.rtc_config; + auto *rtc = opts->mutable_rtc_config(); + + rtc->set_ice_transport_type( + static_cast(rc.ice_transport_type)); + rtc->set_continual_gathering_policy( + static_cast( + rc.continual_gathering_policy)); + for (const IceServer &ice : rc.ice_servers) { + auto *s = rtc->add_ice_servers(); + + // proto: repeated string urls = 1 + if (!ice.url.empty()) { + s->add_urls(ice.url); + } + if (!ice.username.empty()) { + s->set_username(ice.username); + } + if (!ice.credential.empty()) { + // proto: password = 3 + s->set_password(ice.credential); + } + } + } + std::cout << "connectAsync sendRequest " << std::endl; proto::FfiResponse resp = sendRequest(req); if (!resp.has_connect()) { throw std::runtime_error("FfiResponse missing connect"); diff --git a/src/remote_audio_track.cpp b/src/remote_audio_track.cpp index 4384920..f8e4b81 100644 --- a/src/remote_audio_track.cpp +++ b/src/remote_audio_track.cpp @@ -24,26 +24,13 @@ namespace livekit { -RemoteAudioTrack::RemoteAudioTrack(FfiHandle handle, - const proto::OwnedTrack &track) - : Track(std::move(handle), track.info().sid(), track.info().name(), +RemoteAudioTrack::RemoteAudioTrack(const proto::OwnedTrack &track) + : Track(FfiHandle{static_cast(track.handle().id())}, + track.info().sid(), track.info().name(), fromProto(track.info().kind()), fromProto(track.info().stream_state()), track.info().muted(), true) {} -std::shared_ptr RemoteAudioTrack::createRemoteAudioTrack( - const std::string &name, const std::shared_ptr &source) { - proto::FfiRequest req; - auto *msg = req.mutable_create_audio_track(); - msg->set_name(name); - msg->set_source_handle(static_cast(source->ffi_handle_id())); - - proto::FfiResponse resp = FfiClient::instance().sendRequest(req); - const proto::OwnedTrack &owned = resp.create_audio_track().track(); - FfiHandle handle(static_cast(owned.handle().id())); - return std::make_shared(std::move(handle), owned); -} - std::string RemoteAudioTrack::to_string() const { return "rtc.RemoteAudioTrack(sid=" + sid() + ", name=" + name() + ")"; } diff --git a/src/remote_video_track.cpp b/src/remote_video_track.cpp index 02b5d77..3fd9ee0 100644 --- a/src/remote_video_track.cpp +++ b/src/remote_video_track.cpp @@ -24,26 +24,13 @@ namespace livekit { -RemoteVideoTrack::RemoteVideoTrack(FfiHandle handle, - const proto::OwnedTrack &track) - : Track(std::move(handle), track.info().sid(), track.info().name(), +RemoteVideoTrack::RemoteVideoTrack(const proto::OwnedTrack &track) + : Track(FfiHandle{static_cast(track.handle().id())}, + track.info().sid(), track.info().name(), fromProto(track.info().kind()), fromProto(track.info().stream_state()), track.info().muted(), true) {} -std::shared_ptr RemoteVideoTrack::createRemoteVideoTrack( - const std::string &name, const std::shared_ptr &source) { - proto::FfiRequest req; - auto *msg = req.mutable_create_video_track(); - msg->set_name(name); - msg->set_source_handle(static_cast(source->ffi_handle_id())); - - proto::FfiResponse resp = FfiClient::instance().sendRequest(req); - const proto::OwnedTrack &owned = resp.create_video_track().track(); - FfiHandle handle(static_cast(owned.handle().id())); - return std::make_shared(std::move(handle), owned); -} - std::string RemoteVideoTrack::to_string() const { return "rtc.RemoteVideoTrack(sid=" + sid() + ", name=" + name() + ")"; } diff --git a/src/room.cpp b/src/room.cpp index 9f56cdb..f52edb1 100644 --- a/src/room.cpp +++ b/src/room.cpp @@ -16,16 +16,21 @@ #include "livekit/room.h" +#include "livekit/audio_stream.h" #include "livekit/ffi_client.h" #include "livekit/local_participant.h" #include "livekit/local_track_publication.h" +#include "livekit/remote_audio_track.h" #include "livekit/remote_participant.h" #include "livekit/remote_track_publication.h" +#include "livekit/remote_video_track.h" #include "livekit/room_delegate.h" +#include "livekit/video_stream.h" #include "ffi.pb.h" #include "room.pb.h" #include "room_proto_converter.h" +#include "track.pb.h" #include "track_proto_converter.h" #include #include @@ -37,7 +42,6 @@ using proto::ConnectRequest; using proto::FfiEvent; using proto::FfiRequest; using proto::FfiResponse; -using proto::RoomOptions; namespace { @@ -67,7 +71,8 @@ void Room::setDelegate(RoomDelegate *delegate) { delegate_ = delegate; } -bool Room::Connect(const std::string &url, const std::string &token) { +bool Room::Connect(const std::string &url, const std::string &token, + const RoomOptions &options) { auto listenerId = FfiClient::instance().AddListener( std::bind(&Room::OnEvent, this, std::placeholders::_1)); { @@ -77,7 +82,7 @@ bool Room::Connect(const std::string &url, const std::string &token) { throw std::runtime_error("already connected"); } } - auto fut = FfiClient::instance().connectAsync(url, token); + auto fut = FfiClient::instance().connectAsync(url, token, options); try { auto connectCb = fut.get(); // fut will throw if it fails to connect to the room @@ -261,8 +266,61 @@ void Room::OnEvent(const FfiEvent &event) { break; } case proto::RoomEvent::kTrackSubscribed: { - auto ev = fromProto(re.track_subscribed()); + const auto &ts = re.track_subscribed(); + const std::string &identity = ts.participant_identity(); + const auto &owned_track = ts.track(); + const auto &track_info = owned_track.info(); + std::shared_ptr rpublication; + RemoteParticipant *rparticipant = nullptr; + std::shared_ptr remote_track; + { + std::lock_guard guard(lock_); + // Find participant + auto pit = remote_participants_.find(identity); + if (pit == remote_participants_.end()) { + std::cerr << "track_subscribed for unknown participant: " << identity + << "\n"; + break; + } + rparticipant = pit->second.get(); + // Find existing publication by track SID (from track_published) + auto &pubs = rparticipant->mutable_track_publications(); + auto pubIt = pubs.find(track_info.sid()); + if (pubIt == pubs.end()) { + std::cerr << "track_subscribed for unknown publication sid " + << track_info.sid() << " (participant " << identity + << ")\n"; + break; + } + rpublication = pubIt->second; + + // Create RemoteVideoTrack / RemoteAudioTrack + if (track_info.kind() == proto::TrackKind::KIND_VIDEO) { + remote_track = std::make_shared(owned_track); + } else if (track_info.kind() == proto::TrackKind::KIND_AUDIO) { + remote_track = std::make_shared(owned_track); + } else { + std::cerr << "track_subscribed with unsupported kind: " + << track_info.kind() << "\n"; + break; + } + std::cout << "before setTrack " << std::endl; + + // Attach to publication, mark subscribed + rpublication->setTrack(remote_track); + std::cout << "setTrack " << std::endl; + rpublication->setSubscribed(true); + std::cout << "setSubscribed " << std::endl; + } + + // Emit remote track_subscribed-style callback + TrackSubscribedEvent ev; + ev.track = remote_track; + ev.publication = rpublication; + ev.participant = rparticipant; + std::cout << "onTrackSubscribed " << std::endl; delegate_snapshot->onTrackSubscribed(*this, ev); + std::cout << "after onTrackSubscribed " << std::endl; break; } case proto::RoomEvent::kTrackUnsubscribed: { diff --git a/src/room_event_converter.cpp b/src/room_event_converter.cpp index 651896f..bb1846c 100644 --- a/src/room_event_converter.cpp +++ b/src/room_event_converter.cpp @@ -260,18 +260,6 @@ LocalTrackSubscribedEvent fromProto(const proto::LocalTrackSubscribed &src) { return ev; } -TrackPublishedEvent fromProto(const proto::TrackPublished &src) { - TrackPublishedEvent ev; - ev.participant_identity = src.participant_identity(); - // OwnedTrackPublication publication = 2; - // TODO: map publication info once you inspect OwnedTrackPublication - // ev.publication_sid = src.publication().info().sid(); - // ev.track_name = src.publication().info().name(); - // ev.track_kind = ...; - // ev.track_source = ...; - return ev; -} - TrackUnpublishedEvent fromProto(const proto::TrackUnpublished &src) { TrackUnpublishedEvent ev; ev.participant_identity = src.participant_identity(); @@ -279,18 +267,6 @@ TrackUnpublishedEvent fromProto(const proto::TrackUnpublished &src) { return ev; } -TrackSubscribedEvent fromProto(const proto::TrackSubscribed &src) { - TrackSubscribedEvent ev; - ev.participant_identity = src.participant_identity(); - // OwnedTrack track = 2; - // TODO: map track info once you inspect OwnedTrack - // ev.track_sid = src.track().info().sid(); - // ev.track_name = src.track().info().name(); - // ev.track_kind = ...; - // ev.track_source = ...; - return ev; -} - TrackUnsubscribedEvent fromProto(const proto::TrackUnsubscribed &src) { TrackUnsubscribedEvent ev; ev.participant_identity = src.participant_identity(); diff --git a/src/room_event_converter.h b/src/room_event_converter.h index 96d9c39..f8dde1f 100644 --- a/src/room_event_converter.h +++ b/src/room_event_converter.h @@ -52,7 +52,6 @@ LocalTrackSubscribedEvent fromProto(const proto::LocalTrackSubscribed &src); TrackPublishedEvent fromProto(const proto::TrackPublished &src); TrackUnpublishedEvent fromProto(const proto::TrackUnpublished &src); -TrackSubscribedEvent fromProto(const proto::TrackSubscribed &src); TrackUnsubscribedEvent fromProto(const proto::TrackUnsubscribed &src); TrackSubscriptionFailedEvent fromProto(const proto::TrackSubscriptionFailed &src); diff --git a/src/room_proto_converter.cpp b/src/room_proto_converter.cpp index 5423f07..0c41a6a 100644 --- a/src/room_proto_converter.cpp +++ b/src/room_proto_converter.cpp @@ -269,18 +269,6 @@ TrackUnpublishedEvent fromProto(const proto::TrackUnpublished &in) { return ev; } -TrackSubscribedEvent fromProto(const proto::TrackSubscribed &in) { - TrackSubscribedEvent ev; - ev.participant_identity = in.participant_identity(); - // OwnedTrack track = 2; - // TODO: map track info once you inspect OwnedTrack - // ev.track_sid = in.track().info().sid(); - // ev.track_name = in.track().info().name(); - // ev.track_kind = ...; - // ev.track_source = ...; - return ev; -} - TrackUnsubscribedEvent fromProto(const proto::TrackUnsubscribed &in) { TrackUnsubscribedEvent ev; ev.participant_identity = in.participant_identity(); diff --git a/src/room_proto_converter.h b/src/room_proto_converter.h index eafba5a..89ba7c4 100644 --- a/src/room_proto_converter.h +++ b/src/room_proto_converter.h @@ -51,7 +51,6 @@ LocalTrackSubscribedEvent fromProto(const proto::LocalTrackSubscribed &in); TrackPublishedEvent fromProto(const proto::TrackPublished &in); TrackUnpublishedEvent fromProto(const proto::TrackUnpublished &in); -TrackSubscribedEvent fromProto(const proto::TrackSubscribed &in); TrackUnsubscribedEvent fromProto(const proto::TrackUnsubscribed &in); TrackSubscriptionFailedEvent fromProto(const proto::TrackSubscriptionFailed &in); diff --git a/src/video_frame.cpp b/src/video_frame.cpp index a56e7f2..c06bff4 100644 --- a/src/video_frame.cpp +++ b/src/video_frame.cpp @@ -264,6 +264,9 @@ computePlaneInfos(uintptr_t base, int width, int height, VideoBufferType type) { // LKVideoFrame implementation // ---------------------------------------------------------------------------- +LKVideoFrame::LKVideoFrame() + : width_{0}, height_{0}, type_{VideoBufferType::BGRA}, data_{} {} + LKVideoFrame::LKVideoFrame(int width, int height, VideoBufferType type, std::vector data) : width_(width), height_(height), type_(type), data_(std::move(data)) { @@ -310,8 +313,7 @@ LKVideoFrame LKVideoFrame::fromOwnedInfo(const proto::OwnedVideoBuffer &owned) { const auto &info = owned.info(); const int width = static_cast(info.width()); const int height = static_cast(info.height()); - // Assuming your C++ enum matches proto's underlying values. - const VideoBufferType type = static_cast(info.type()); + const VideoBufferType type = fromProto(info.type()); std::vector buffer; diff --git a/src/video_stream.cpp b/src/video_stream.cpp index 19b1903..7847920 100644 --- a/src/video_stream.cpp +++ b/src/video_stream.cpp @@ -14,30 +14,22 @@ using proto::FfiEvent; using proto::FfiRequest; using proto::VideoStreamEvent; -// ------------------------ -// Factory helpers -// ------------------------ - -std::unique_ptr +std::shared_ptr VideoStream::fromTrack(const std::shared_ptr &track, const Options &options) { - auto stream = std::unique_ptr(new VideoStream()); + auto stream = std::shared_ptr(new VideoStream()); stream->initFromTrack(track, options); return stream; } -std::unique_ptr +std::shared_ptr VideoStream::fromParticipant(Participant &participant, TrackSource track_source, const Options &options) { - auto stream = std::unique_ptr(new VideoStream()); + auto stream = std::shared_ptr(new VideoStream()); stream->initFromParticipant(participant, track_source, options); return stream; } -// ------------------------ -// Destructor / move -// ------------------------ - VideoStream::~VideoStream() { close(); } VideoStream::VideoStream(VideoStream &&other) noexcept { @@ -77,31 +69,76 @@ VideoStream &VideoStream::operator=(VideoStream &&other) noexcept { return *this; } -// ------------------------ -// Init internals -// ------------------------ +// --------------------- Public API --------------------- + +bool VideoStream::read(VideoFrameEvent &out) { + std::unique_lock lock(mutex_); + + cv_.wait(lock, [this] { return !queue_.empty() || eof_ || closed_; }); + + if (closed_ || (queue_.empty() && eof_)) { + return false; // EOS / closed + } + + out = std::move(queue_.front()); + queue_.pop_front(); + return true; +} + +void VideoStream::close() { + std::cout << "VideoSream::close() \n"; + { + std::lock_guard lock(mutex_); + if (closed_) { + return; + } + closed_ = true; + } + + // Dispose FFI handle + if (stream_handle_.get() != 0) { + stream_handle_.reset(); + } + + // Remove listener + if (listener_id_ != 0) { + FfiClient::instance().RemoveListener(listener_id_); + listener_id_ = 0; + } + + // Wake any waiting readers + cv_.notify_all(); +} + +// --------------------- Internal helpers --------------------- void VideoStream::initFromTrack(const std::shared_ptr &track, const Options &options) { capacity_ = options.capacity; - // 1) Subscribe to FFI events + // Subscribe to FFI events, this is essential to get video frames from FFI. listener_id_ = FfiClient::instance().AddListener( [this](const proto::FfiEvent &e) { this->onFfiEvent(e); }); - // 2) Send FFI request to create a new video stream bound to this track + // Send FFI request to create a new video stream bound to this track FfiRequest req; auto *new_video_stream = req.mutable_new_video_stream(); - new_video_stream->set_track_handle(track->ffi_handle_id()); + new_video_stream->set_track_handle( + static_cast(track->ffi_handle_id())); new_video_stream->set_type(proto::VideoStreamType::VIDEO_STREAM_NATIVE); new_video_stream->set_normalize_stride(true); new_video_stream->set_format(toProto(options.format)); auto resp = FfiClient::instance().sendRequest(req); + if (!resp.has_new_video_stream()) { + std::cerr << "VideoStream::initFromTrack: FFI response missing " + "new_video_stream()\n"; + throw std::runtime_error("new_video_stream FFI request failed"); + } // Adjust field names to match your proto exactly: const auto &stream = resp.new_video_stream().stream(); stream_handle_ = FfiHandle(static_cast(stream.handle().id())); - // stream.info() is available if you want to cache metadata. + // TODO, do we need to cache the metadata from stream.info ? } void VideoStream::initFromParticipant(Participant &participant, @@ -129,52 +166,6 @@ void VideoStream::initFromParticipant(Participant &participant, stream_handle_ = FfiHandle(static_cast(stream.handle().id())); } -// ------------------------ -// Public API -// ------------------------ - -bool VideoStream::read(VideoFrameEvent &out) { - std::unique_lock lock(mutex_); - - cv_.wait(lock, [this] { return !queue_.empty() || eof_ || closed_; }); - - if (closed_ || (queue_.empty() && eof_)) { - return false; // EOS / closed - } - - out = std::move(queue_.front()); - queue_.pop_front(); - return true; -} - -void VideoStream::close() { - { - std::lock_guard lock(mutex_); - if (closed_) { - return; - } - closed_ = true; - } - - // Dispose FFI handle - if (stream_handle_.get() != 0) { - stream_handle_.reset(); - } - - // Remove listener - if (listener_id_ != 0) { - FfiClient::instance().RemoveListener(listener_id_); - listener_id_ = 0; - } - - // Wake any waiting readers - cv_.notify_all(); -} - -// ------------------------ -// Internal helpers -// ------------------------ - void VideoStream::onFfiEvent(const proto::FfiEvent &event) { // Filter for video_stream_event first. if (event.message_case() != FfiEvent::kVideoStreamEvent) { @@ -195,7 +186,6 @@ void VideoStream::onFfiEvent(const proto::FfiEvent &event) { VideoFrameEvent ev{std::move(frame), fr.timestamp_us(), static_cast(fr.rotation())}; - pushFrame(std::move(ev)); } else if (vse.has_eos()) { pushEos(); @@ -221,14 +211,17 @@ void VideoStream::pushFrame(VideoFrameEvent &&ev) { } void VideoStream::pushEos() { + std::cout << "pushEos 1" << std::endl; { std::lock_guard lock(mutex_); if (eof_) { + std::cout << "pushEos 2" << std::endl; return; } eof_ = true; } cv_.notify_all(); + std::cout << "pushEos 3" << std::endl; } } // namespace livekit From cf34bcfe66efc0166f456e98788c37c580163c36 Mon Sep 17 00:00:00 2001 From: shijing xian Date: Tue, 2 Dec 2025 09:58:52 -0800 Subject: [PATCH 4/5] fix the comments and the build --- include/livekit/audio_stream.h | 57 +++++++++++++++------------- include/livekit/local_audio_track.h | 43 +++++++++++++++++---- include/livekit/local_video_track.h | 43 +++++++++++++++++---- include/livekit/remote_audio_track.h | 22 ++++++++--- include/livekit/remote_video_track.h | 20 ++++++++-- include/livekit/room_delegate.h | 1 + src/local_audio_track.cpp | 3 +- src/local_video_track.cpp | 3 +- 8 files changed, 141 insertions(+), 51 deletions(-) diff --git a/include/livekit/audio_stream.h b/include/livekit/audio_stream.h index 3d405b5..3616e8d 100644 --- a/include/livekit/audio_stream.h +++ b/include/livekit/audio_stream.h @@ -39,52 +39,55 @@ struct AudioFrameEvent { AudioFrame frame; }; -// Represents a pull-based stream of decoded PCM audio frames coming from -// a remote (or local) LiveKit track. Similar to VideoStream, but for audio. -// -// Typical usage: -// -// AudioStream::Options opts; -// auto stream = AudioStream::fromTrack(remoteAudioTrack, opts); -// -// AudioFrameEvent ev; -// while (stream->read(ev)) { -// // ev.frame contains interleaved int16 PCM samples -// } -// -// stream->close(); // optional, called automatically in destructor +/** + * Represents a pull-based stream of decoded PCM audio frames coming from + * a remote (or local) LiveKit track. Similar to VideoStream, but for audio. + * + * Typical usage: + * + * AudioStream::Options opts; + * auto stream = AudioStream::fromTrack(remoteAudioTrack, opts); + * + * AudioFrameEvent ev; + * while (stream->read(ev)) { + * // ev.frame contains interleaved int16 PCM samples + * } + * + * stream->close(); // optional, called automatically in destructor + */ class AudioStream { public: - // Configuration options for AudioStream creation. + /// Configuration options for AudioStream creation. struct Options { - // Maximum number of AudioFrameEvent items buffered in the internal queue. - // 0 means "unbounded" (the queue can grow without limit). - // - // Using a small non-zero capacity gives ring-buffer semantics: - // if the queue is full, the oldest frame is dropped when a new one arrives. + /// Maximum number of AudioFrameEvent items buffered in the internal queue. + /// 0 means "unbounded" (the queue can grow without limit). + /// + /// Using a small non-zero capacity gives ring-buffer semantics: + /// if the queue is full, the oldest frame is dropped when a new one + /// arrives. std::size_t capacity{0}; - // Optional: name of a noise cancellation module to enable for this stream. - // Empty string means "no noise cancellation". + /// Optional: name of a noise cancellation module to enable for this stream. + /// Empty string means "no noise cancellation". std::string noise_cancellation_module; - // Optional: JSON-encoded configuration for the noise cancellation module. - // Empty string means "use module defaults". + /// Optional: JSON-encoded configuration for the noise cancellation module. + /// Empty string means "use module defaults". std::string noise_cancellation_options_json; }; - // Factory: create an AudioStream bound to a specific Track + /// Factory: create an AudioStream bound to a specific Track static std::shared_ptr fromTrack(const std::shared_ptr &track, const Options &options); - // Factory: create an AudioStream from a Participant + TrackSource + /// Factory: create an AudioStream from a Participant + TrackSource static std::shared_ptr fromParticipant(Participant &participant, TrackSource track_source, const Options &options); ~AudioStream(); - // No copy, assignment constructors. + /// No copy, assignment constructors. AudioStream(const AudioStream &) = delete; AudioStream &operator=(const AudioStream &) = delete; AudioStream(AudioStream &&) noexcept; diff --git a/include/livekit/local_audio_track.h b/include/livekit/local_audio_track.h index 66cc52b..1d99c0a 100644 --- a/include/livekit/local_audio_track.h +++ b/include/livekit/local_audio_track.h @@ -28,25 +28,54 @@ class OwnedTrack; class AudioSource; -// ============================================================ -// LocalAudioTrack -// ============================================================ +/** + * Represents a user-provided audio track sourced from the local device. + * + * `LocalAudioTrack` is used to publish microphone audio (or any custom + * audio source) to a LiveKit room. It wraps a platform-specific audio + * source and exposes simple controls such as `mute()` and `unmute()`. + * + * Typical usage: + * + * auto source = AudioSource::create(...); + * auto track = LocalAudioTrack::createLocalAudioTrack("mic", source); + * room->localParticipant()->publishTrack(track); + * + * Muting a local audio track stops transmitting audio to the room, but + * the underlying source may continue capturing depending on platform + * behavior. + * + * The track name provided during creation is visible to remote + * participants and can be used for debugging or UI display. + */ class LocalAudioTrack : public Track { public: - explicit LocalAudioTrack(FfiHandle handle, const proto::OwnedTrack &track); - + /// Creates a new local audio track backed by the given `AudioSource`. + /// + /// @param name Human-readable name for the track. This may appear to + /// remote participants and in analytics/debug logs. + /// @param source The audio source that produces PCM frames for this track. + /// + /// @return A shared pointer to the newly constructed `LocalAudioTrack`. static std::shared_ptr createLocalAudioTrack(const std::string &name, const std::shared_ptr &source); - // Mute/unmute + /// Mutes the audio track. + /// + /// A muted track stops sending audio to the room, but the track remains + /// published and can be unmuted later without renegotiation. void mute(); + + /// Unmutes the audio track and resumes sending audio to the room. void unmute(); + /// Returns a human-readable string representation of the track, + /// including its SID and name. Useful for debugging and logging. std::string to_string() const; private: - // Optional: you may add private helpers if needed + explicit LocalAudioTrack(FfiHandle handle, const proto::OwnedTrack &track); }; } // namespace livekit \ No newline at end of file diff --git a/include/livekit/local_video_track.h b/include/livekit/local_video_track.h index 63b710a..2e0e22f 100644 --- a/include/livekit/local_video_track.h +++ b/include/livekit/local_video_track.h @@ -28,25 +28,54 @@ class OwnedTrack; class VideoSource; -// ============================================================ -// LocalAudioTrack -// ============================================================ +/** + * Represents a user-provided video track sourced from the local device. + * + * `LocalVideoTrack` is used to publish camera video (or any custom + * video source) to a LiveKit room. It wraps a platform-specific video + * source and exposes simple controls such as `mute()` and `unmute()`. + * + * Typical usage: + * + * auto source = VideoSource::create(...); + * auto track = LocalVideoTrack::createLocalVideoTrack("cam", source); + * room->localParticipant()->publishTrack(track); + * + * Muting a local video track stops transmitting video to the room, but + * the underlying source may continue capturing depending on platform + * behavior. + * + * The track name provided during creation is visible to remote + * participants and can be used for debugging or UI display. + */ class LocalVideoTrack : public Track { public: - explicit LocalVideoTrack(FfiHandle handle, const proto::OwnedTrack &track); - + /// Creates a new local video track backed by the given `VideoSource`. + /// + /// @param name Human-readable name for the track. This may appear to + /// remote participants and in analytics/debug logs. + /// @param source The video source that produces video frames for this track. + /// + /// @return A shared pointer to the newly constructed `LocalVideoTrack`. static std::shared_ptr createLocalVideoTrack(const std::string &name, const std::shared_ptr &source); - // Mute/unmute + /// Mutes the video track. + /// + /// A muted track stops sending video to the room, but the track remains + /// published and can be unmuted later without renegotiation. void mute(); + + /// Unmutes the video track and resumes sending video to the room. void unmute(); + /// Returns a human-readable string representation of the track, + /// including its SID and name. Useful for debugging and logging. std::string to_string() const; private: - // Optional: you may add private helpers if needed + explicit LocalVideoTrack(FfiHandle handle, const proto::OwnedTrack &track); }; } // namespace livekit \ No newline at end of file diff --git a/include/livekit/remote_audio_track.h b/include/livekit/remote_audio_track.h index 99e4c8e..572e62c 100644 --- a/include/livekit/remote_audio_track.h +++ b/include/livekit/remote_audio_track.h @@ -28,16 +28,28 @@ class OwnedTrack; class AudioSource; -// ============================================================ -// RemoteAudioTrack -// ============================================================ +/** + * Represents an audio track published by a remote participant and + * subscribed to by the local participant. + * + * `RemoteAudioTrack` instances are created internally when the SDK receives a + * `kTrackSubscribed` event. Each instance is owned by its associated + * `RemoteParticipant` and delivered to the application via + * `TrackSubscribedEvent`. + * + * Applications generally interact with `RemoteAudioTrack` through events and + * `RemoteTrackPublication`, not through direct construction. + */ class RemoteAudioTrack : public Track { public: + /// Constructs a `RemoteAudioTrack` from an internal protocol-level + /// `OwnedTrack` description provided by the signaling/FFI layer. + /// This constructor is intended for internal SDK use only. explicit RemoteAudioTrack(const proto::OwnedTrack &track); + /// Returns a concise, human-readable string summarizing the track, + /// including its SID and name. Useful for debugging and logging. std::string to_string() const; - -private: }; } // namespace livekit \ No newline at end of file diff --git a/include/livekit/remote_video_track.h b/include/livekit/remote_video_track.h index c6abfce..ff65c4c 100644 --- a/include/livekit/remote_video_track.h +++ b/include/livekit/remote_video_track.h @@ -28,13 +28,27 @@ class OwnedTrack; class VideoSource; -// ============================================================ -// RemoteVideoTrack -// ============================================================ +/** + * Represents an video track published by a remote participant and + * subscribed to by the local participant. + * + * `RemoteVideoTrack` instances are created internally when the SDK receives a + * `kTrackSubscribed` event. Each instance is owned by its associated + * `RemoteParticipant` and delivered to the application via + * `TrackSubscribedEvent`. + * + * Applications generally interact with `RemoteVideoTrack` through events and + * `RemoteTrackPublication`, not through direct construction. + */ class RemoteVideoTrack : public Track { public: + /// Constructs a `RemoteVideoTrack` from an internal protocol-level + /// `OwnedTrack` description provided by the signaling/FFI layer. + /// This constructor is intended for internal SDK use only. explicit RemoteVideoTrack(const proto::OwnedTrack &track); + /// Returns a concise, human-readable string summarizing the track, + /// including its SID and name. Useful for debugging and logging. std::string to_string() const; private: diff --git a/include/livekit/room_delegate.h b/include/livekit/room_delegate.h index 9073654..0e73fae 100644 --- a/include/livekit/room_delegate.h +++ b/include/livekit/room_delegate.h @@ -18,6 +18,7 @@ #include #include +#include #include #include #include diff --git a/src/local_audio_track.cpp b/src/local_audio_track.cpp index 663d0e3..02d73b0 100644 --- a/src/local_audio_track.cpp +++ b/src/local_audio_track.cpp @@ -41,7 +41,8 @@ std::shared_ptr LocalAudioTrack::createLocalAudioTrack( proto::FfiResponse resp = FfiClient::instance().sendRequest(req); const proto::OwnedTrack &owned = resp.create_audio_track().track(); FfiHandle handle(static_cast(owned.handle().id())); - return std::make_shared(std::move(handle), owned); + return std::shared_ptr( + new LocalAudioTrack(std::move(handle), owned)); } void LocalAudioTrack::mute() { diff --git a/src/local_video_track.cpp b/src/local_video_track.cpp index 455176a..7294a99 100644 --- a/src/local_video_track.cpp +++ b/src/local_video_track.cpp @@ -41,7 +41,8 @@ std::shared_ptr LocalVideoTrack::createLocalVideoTrack( proto::FfiResponse resp = FfiClient::instance().sendRequest(req); const proto::OwnedTrack &owned = resp.create_video_track().track(); FfiHandle handle(static_cast(owned.handle().id())); - return std::make_shared(std::move(handle), owned); + return std::shared_ptr( + new LocalVideoTrack(std::move(handle), owned)); } void LocalVideoTrack::mute() { From 7141457ad49aaf35f4a99f68adc6853abeb43956 Mon Sep 17 00:00:00 2001 From: shijing xian Date: Tue, 2 Dec 2025 10:15:58 -0800 Subject: [PATCH 5/5] another try to fix the linux build --- examples/CMakeLists.txt | 3 +++ examples/simple_room/fallback_capture.cpp | 3 +++ examples/simple_room/main.cpp | 2 ++ 3 files changed, 8 insertions(+) diff --git a/examples/CMakeLists.txt b/examples/CMakeLists.txt index ef7076e..f242af3 100644 --- a/examples/CMakeLists.txt +++ b/examples/CMakeLists.txt @@ -1,6 +1,9 @@ cmake_minimum_required(VERSION 3.31.0) project (livekit-examples) +set(CMAKE_CXX_STANDARD 17) +set(CMAKE_CXX_STANDARD_REQUIRED ON) + list(APPEND CMAKE_MODULE_PATH "${CMAKE_CURRENT_SOURCE_DIR}/cmake") include(sdl3) diff --git a/examples/simple_room/fallback_capture.cpp b/examples/simple_room/fallback_capture.cpp index 158b81c..e0c3c8c 100644 --- a/examples/simple_room/fallback_capture.cpp +++ b/examples/simple_room/fallback_capture.cpp @@ -16,6 +16,9 @@ #include "fallback_capture.h" +#include +#include + #include "livekit/livekit.h" #include "wav_audio_source.h" diff --git a/examples/simple_room/main.cpp b/examples/simple_room/main.cpp index dd23091..e675a2f 100644 --- a/examples/simple_room/main.cpp +++ b/examples/simple_room/main.cpp @@ -18,7 +18,9 @@ #include #include #include +#include #include +#include #include #include #include