From cefade9c2e81b02696047b063657c459ecc8b272 Mon Sep 17 00:00:00 2001
From: shijing xian <shijing.xian@livekit.io>
Date: Tue, 25 Nov 2025 09:30:25 -0800
Subject: [PATCH 1/5] initial commit for video stream and other hooks

---
 CMakeLists.txt                             |   2 +
 include/livekit/livekit.h                  |   1 +
 include/livekit/local_track_publication.h  |   3 +-
 include/livekit/participant.h              |  20 +-
 include/livekit/remote_participant.h       |  60 +++++
 include/livekit/remote_track_publication.h |   2 +
 include/livekit/room.h                     |   4 +
 include/livekit/track_publication.h        |   2 +-
 include/livekit/video_stream.h             |  86 ++++++++
 src/remote_participant.cpp                 |  48 ++++
 src/room.cpp                               | 135 +++++++++++-
 src/room_proto_converter.cpp               |  10 +-
 src/video_stream.cpp                       | 241 +++++++++++++++++++++
 13 files changed, 604 insertions(+), 10 deletions(-)
 create mode 100644 include/livekit/remote_participant.h
 create mode 100644 include/livekit/video_stream.h
 create mode 100644 src/remote_participant.cpp
 create mode 100644 src/video_stream.cpp

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 07fcb34..9dab685 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -166,6 +166,7 @@ add_library(livekit
   include/livekit/remote_audio_track.h
   include/livekit/participant.h
   include/livekit/local_participant.h
+  include/livekit/remote_participant.h
   include/livekit/livekit.h
   include/livekit/stats.h
   include/livekit/track.h
@@ -186,6 +187,7 @@ add_library(livekit
   src/room_proto_converter.cpp
   src/room_proto_converter.h
   src/local_participant.cpp
+  src/remote_participant.cpp
   src/stats.cpp
   src/track.cpp
   src/track_proto_converter.cpp
diff --git a/include/livekit/livekit.h b/include/livekit/livekit.h
index 23d63c4..4b5fc30 100644
--- a/include/livekit/livekit.h
+++ b/include/livekit/livekit.h
@@ -21,6 +21,7 @@
 #include "local_track_publication.h"
 #include "local_video_track.h"
 #include "participant.h"
+#include "remote_participant.h"
 #include "room.h"
 #include "room_delegate.h"
 #include "track_publication.h"
diff --git a/include/livekit/local_track_publication.h b/include/livekit/local_track_publication.h
index b53e4f5..3e8c6ed 100644
--- a/include/livekit/local_track_publication.h
+++ b/include/livekit/local_track_publication.h
@@ -28,7 +28,8 @@ class Track;
 
 class LocalTrackPublication : public TrackPublication {
 public:
-  /// Construct from an OwnedTrackPublication proto.
+  /// Note, this RemoteTrackPublication is constructed internally only;
+  /// safe to accept proto::OwnedTrackPublication.
   explicit LocalTrackPublication(const proto::OwnedTrackPublication &owned);
 
   /// Typed accessor for the attached LocalTrack (if any).
diff --git a/include/livekit/participant.h b/include/livekit/participant.h
index 826263b..a0003fe 100644
--- a/include/livekit/participant.h
+++ b/include/livekit/participant.h
@@ -41,7 +41,7 @@ class Participant {
         metadata_(std::move(metadata)), attributes_(std::move(attributes)),
         kind_(kind), reason_(reason) {}
 
-  // Plain getters/setters (caller ensures threading)
+  // Plain getters (caller ensures threading)
   const std::string &sid() const noexcept { return sid_; }
   const std::string &name() const noexcept { return name_; }
   const std::string &identity() const noexcept { return identity_; }
@@ -55,6 +55,24 @@ class Participant {
 
   uintptr_t ffiHandleId() const noexcept { return handle_.get(); }
 
+  // Setters (caller ensures threading)
+  void set_name(std::string name) noexcept { name_ = std::move(name); }
+  void set_metadata(std::string metadata) noexcept {
+    metadata_ = std::move(metadata);
+  }
+  void
+  set_attributes(std::unordered_map<std::string, std::string> attrs) noexcept {
+    attributes_ = std::move(attrs);
+  }
+  void set_attribute(const std::string &key, const std::string &value) {
+    attributes_[key] = value;
+  }
+  void remove_attribute(const std::string &key) { attributes_.erase(key); }
+  void set_kind(ParticipantKind kind) noexcept { kind_ = kind; }
+  void set_disconnect_reason(DisconnectReason reason) noexcept {
+    reason_ = reason;
+  }
+
 private:
   FfiHandle handle_;
   std::string sid_, name_, identity_, metadata_;
diff --git a/include/livekit/remote_participant.h b/include/livekit/remote_participant.h
new file mode 100644
index 0000000..d1c1579
--- /dev/null
+++ b/include/livekit/remote_participant.h
@@ -0,0 +1,60 @@
+/*
+ * Copyright 2025 LiveKit
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an “AS IS” BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+#include "participant.h"
+
+#include <memory>
+#include <string>
+#include <unordered_map>
+
+namespace livekit {
+
+class RemoteTrackPublication;
+
+class RemoteParticipant : public Participant {
+public:
+  using TrackPublicationMap =
+      std::unordered_map<std::string, std::shared_ptr<RemoteTrackPublication>>;
+
+  RemoteParticipant(FfiHandle handle, std::string sid, std::string name,
+                    std::string identity, std::string metadata,
+                    std::unordered_map<std::string, std::string> attributes,
+                    ParticipantKind kind, DisconnectReason reason);
+
+  // A dictionary of track publications associated with the participant.
+  const TrackPublicationMap &track_publications() const noexcept {
+    return track_publications_;
+  }
+
+  // Optional: non-const access if you want to mutate in-place.
+  TrackPublicationMap &mutable_track_publications() noexcept {
+    return track_publications_;
+  }
+
+  // C++ equivalent of Python's __repr__
+  std::string to_string() const;
+
+private:
+  TrackPublicationMap track_publications_;
+};
+
+// Convenience for logging / streaming
+std::ostream &operator<<(std::ostream &os,
+                         const RemoteParticipant &participant);
+
+} // namespace livekit
diff --git a/include/livekit/remote_track_publication.h b/include/livekit/remote_track_publication.h
index 9066058..aa39408 100644
--- a/include/livekit/remote_track_publication.h
+++ b/include/livekit/remote_track_publication.h
@@ -28,6 +28,8 @@ class Track;
 
 class RemoteTrackPublication : public TrackPublication {
 public:
+  /// Note, this RemoteTrackPublication is constructed internally only;
+  /// safe to accept proto::OwnedTrackPublication.
   explicit RemoteTrackPublication(const proto::OwnedTrackPublication &owned);
 
   /// Typed accessor for the attached RemoteTrack (if any).
diff --git a/include/livekit/room.h b/include/livekit/room.h
index 54f0d99..98077ed 100644
--- a/include/livekit/room.h
+++ b/include/livekit/room.h
@@ -32,6 +32,7 @@ class FfiEvent;
 }
 
 class LocalParticipant;
+class RemoteParticipant;
 
 class Room {
 public:
@@ -43,6 +44,7 @@ class Room {
   // Accessors
   RoomInfoData room_info() const;
   LocalParticipant *local_participant() const;
+  RemoteParticipant *remote_participant(const std::string &identity) const;
 
 private:
   mutable std::mutex lock_;
@@ -51,6 +53,8 @@ class Room {
   RoomInfoData room_info_;
   std::shared_ptr<FfiHandle> room_handle_;
   std::unique_ptr<LocalParticipant> local_participant_;
+  std::unordered_map<std::string, std::unique_ptr<RemoteParticipant>>
+      remote_participants_;
 
   void OnEvent(const proto::FfiEvent &event);
 };
diff --git a/include/livekit/track_publication.h b/include/livekit/track_publication.h
index 9536437..8503188 100644
--- a/include/livekit/track_publication.h
+++ b/include/livekit/track_publication.h
@@ -22,7 +22,7 @@
 #include <vector>
 
 #include "livekit/ffi_handle.h"
-#include "livekit/track.h" // TrackKind, TrackSource, AudioTrackFeature
+#include "livekit/track.h"
 
 namespace livekit {
 
diff --git a/include/livekit/video_stream.h b/include/livekit/video_stream.h
new file mode 100644
index 0000000..2008358
--- /dev/null
+++ b/include/livekit/video_stream.h
@@ -0,0 +1,86 @@
+#pragma once
+
+#include <condition_variable>
+#include <cstdint>
+#include <deque>
+#include <memory>
+#include <mutex>
+#include <optional>
+
+#include "ffi_handle.h"
+#include "participant.h"
+#include "track.h"
+#include "video_frame.h"
+
+namespace livekit {
+
+// C++ equivalent of Python VideoFrameEvent
+struct VideoFrameEvent {
+  VideoFrame frame;
+  std::int64_t timestamp_us;
+  VideoRotation rotation;
+};
+
+class VideoStream {
+public:
+  struct Options {
+    std::size_t capacity{0};               // 0 = unbounded
+    std::optional<VideoBufferType> format; // optional pixel format
+  };
+
+  // Factory: create a VideoStream bound to a specific Track
+  static std::unique_ptr<VideoStream>
+  fromTrack(const std::shared_ptr<Track> &track,
+            const Options &options = Options{});
+
+  // Factory: create a VideoStream from a Participant + TrackSource
+  static std::unique_ptr<VideoStream>
+  fromParticipant(Participant &participant, TrackSource track_source,
+                  const Options &options = Options{});
+
+  ~VideoStream();
+
+  VideoStream(const VideoStream &) = delete;
+  VideoStream &operator=(const VideoStream &) = delete;
+  VideoStream(VideoStream &&) noexcept;
+  VideoStream &operator=(VideoStream &&) noexcept;
+
+  /// Blocking read: returns true if a frame was delivered,
+  /// false if the stream has ended (EOS or closed).
+  bool read(VideoFrameEvent &out);
+
+  /// Signal that we are no longer interested in frames.
+  /// Disposes the underlying FFI stream and drains internal listener.
+  void close();
+
+private:
+  VideoStream() = default;
+
+  // Internal init helpers, used by the factories
+  void initFromTrack(const std::shared_ptr<Track> &track,
+                     const Options &options);
+  void initFromParticipant(Participant &participant, TrackSource source,
+                           const Options &options);
+
+  // FFI event handler (registered with FfiClient)
+  void onFfiEvent(const FfiEvent &event);
+
+  // Queue helpers
+  void pushFrame(VideoFrameEvent &&ev);
+  void pushEos();
+
+  mutable std::mutex mutex_;
+  std::condition_variable cv_;
+  std::deque<VideoFrameEvent> queue_;
+  std::size_t capacity_{0};
+  bool eof_{false};
+  bool closed_{false};
+
+  // Underlying FFI handle for the video stream
+  FfiHandle stream_handle_;
+
+  // Listener id registered on FfiClient
+  std::int64_t listener_id_{0};
+};
+
+} // namespace livekit
diff --git a/src/remote_participant.cpp b/src/remote_participant.cpp
new file mode 100644
index 0000000..38def8e
--- /dev/null
+++ b/src/remote_participant.cpp
@@ -0,0 +1,48 @@
+/*
+ * Copyright 2025 LiveKit
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an “AS IS” BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "livekit/remote_participant.h"
+
+#include <ostream>
+#include <sstream>
+#include <utility>
+
+namespace livekit {
+
+RemoteParticipant::RemoteParticipant(
+    FfiHandle handle, std::string sid, std::string name, std::string identity,
+    std::string metadata,
+    std::unordered_map<std::string, std::string> attributes,
+    ParticipantKind kind, DisconnectReason reason)
+    : Participant(std::move(handle), std::move(sid), std::move(name),
+                  std::move(identity), std::move(metadata),
+                  std::move(attributes), kind, reason),
+      track_publications_() {}
+
+std::string RemoteParticipant::to_string() const {
+  std::ostringstream oss;
+  oss << "rtc.RemoteParticipant(sid=" << sid() << ", identity=" << identity()
+      << ", name=" << name() << ")";
+  return oss.str();
+}
+
+std::ostream &operator<<(std::ostream &os,
+                         const RemoteParticipant &participant) {
+  os << participant.to_string();
+  return os;
+}
+
+} // namespace livekit
diff --git a/src/room.cpp b/src/room.cpp
index 4cfa3c0..9f56cdb 100644
--- a/src/room.cpp
+++ b/src/room.cpp
@@ -18,6 +18,9 @@
 
 #include "livekit/ffi_client.h"
 #include "livekit/local_participant.h"
+#include "livekit/local_track_publication.h"
+#include "livekit/remote_participant.h"
+#include "livekit/remote_track_publication.h"
 #include "livekit/room_delegate.h"
 
 #include "ffi.pb.h"
@@ -36,6 +39,25 @@ using proto::FfiRequest;
 using proto::FfiResponse;
 using proto::RoomOptions;
 
+namespace {
+
+std::unique_ptr<livekit::RemoteParticipant>
+createRemoteParticipant(const proto::OwnedParticipant &owned) {
+  const auto &pinfo = owned.info();
+  std::unordered_map<std::string, std::string> attrs;
+  attrs.reserve(pinfo.attributes_size());
+  for (const auto &kv : pinfo.attributes()) {
+    attrs.emplace(kv.first, kv.second);
+  }
+  auto kind = livekit::fromProto(pinfo.kind());
+  auto reason = livekit::toDisconnectReason(pinfo.disconnect_reason());
+  livekit::FfiHandle handle(static_cast<uintptr_t>(owned.handle().id()));
+  return std::make_unique<livekit::RemoteParticipant>(
+      std::move(handle), pinfo.sid(), pinfo.name(), pinfo.identity(),
+      pinfo.metadata(), std::move(attrs), kind, reason);
+}
+
+} // namespace
 Room::Room() {}
 
 Room::~Room() {}
@@ -87,10 +109,25 @@ bool Room::Connect(const std::string &url, const std::string &token) {
           std::move(participant_handle), pinfo.sid(), pinfo.name(),
           pinfo.identity(), pinfo.metadata(), std::move(attrs), kind, reason);
     }
-    // Setup remote particpants
+    // Setup remote participants
     {
-      // TODO, implement this remote participant feature
+      const auto &participants = connectCb.result().participants();
+      std::lock_guard<std::mutex> g(lock_);
+      for (const auto &pt : participants) {
+        const auto &owned = pt.participant();
+        auto rp = createRemoteParticipant(owned);
+        // Add the initial remote participant tracks (like Python does)
+        for (const auto &owned_publication_info : pt.publications()) {
+          auto publication =
+              std::make_shared<RemoteTrackPublication>(owned_publication_info);
+          rp->mutable_track_publications().emplace(publication->sid(),
+                                                   std::move(publication));
+        }
+
+        remote_participants_.emplace(rp->identity(), std::move(rp));
+      }
     }
+
     return true;
   } catch (const std::exception &e) {
     // On error, remove the listener and rethrow
@@ -110,6 +147,12 @@ LocalParticipant *Room::local_participant() const {
   return local_participant_.get();
 }
 
+RemoteParticipant *Room::remote_participant(const std::string &identity) const {
+  std::lock_guard<std::mutex> g(lock_);
+  auto it = remote_participants_.find(identity);
+  return it == remote_participants_.end() ? nullptr : it->second.get();
+}
+
 void Room::OnEvent(const FfiEvent &event) {
   // Take a snapshot of the delegate under lock, but do NOT call it under the
   // lock.
@@ -136,11 +179,37 @@ void Room::OnEvent(const FfiEvent &event) {
     switch (re.message_case()) {
     case proto::RoomEvent::kParticipantConnected: {
       auto ev = fromProto(re.participant_connected());
+      std::cout << "kParticipantConnected " << std::endl;
+      // Create and register RemoteParticipant
+      {
+        std::lock_guard<std::mutex> guard(lock_);
+        auto rp = createRemoteParticipant(re.participant_connected().info());
+        remote_participants_.emplace(rp->identity(), std::move(rp));
+      }
+      // TODO, use better public callback events
       delegate_snapshot->onParticipantConnected(*this, ev);
+
       break;
     }
     case proto::RoomEvent::kParticipantDisconnected: {
       auto ev = fromProto(re.participant_disconnected());
+      {
+        std::lock_guard<std::mutex> guard(lock_);
+        const auto &pd = re.participant_disconnected();
+        const std::string &identity = pd.participant_identity();
+        auto it = remote_participants_.find(identity);
+        if (it != remote_participants_.end()) {
+          remote_participants_.erase(it);
+        } else {
+          // We saw a disconnect event for a participant we don't track
+          // internally. This can happen on races or if we never created a
+          // RemoteParticipant
+          std::cerr << "participant_disconnected for unknown identity: "
+                    << identity << std::endl;
+        }
+      }
+      // TODO, should we trigger onParticipantDisconnected if remote
+      // participants can't be found ?
       delegate_snapshot->onParticipantDisconnected(*this, ev);
       break;
     }
@@ -161,6 +230,28 @@ void Room::OnEvent(const FfiEvent &event) {
     }
     case proto::RoomEvent::kTrackPublished: {
       auto ev = fromProto(re.track_published());
+      {
+        std::lock_guard<std::mutex> guard(lock_);
+        const auto &tp = re.track_published();
+        const std::string &identity = tp.participant_identity();
+        auto it = remote_participants_.find(identity);
+        if (it != remote_participants_.end()) {
+          RemoteParticipant *rparticipant = it->second.get();
+          const auto &owned_publication = tp.publication();
+          auto rpublication =
+              std::make_shared<RemoteTrackPublication>(owned_publication);
+          // Store it on the participant, keyed by SID
+          rparticipant->mutable_track_publications().emplace(
+              rpublication->sid(), std::move(rpublication));
+
+        } else {
+          // Optional: log if we get a track for an unknown participant
+          std::cerr << "track_published for unknown participant: " << identity
+                    << "\n";
+          // Don't emit the
+          break;
+        }
+      }
       delegate_snapshot->onTrackPublished(*this, ev);
       break;
     }
@@ -322,6 +413,46 @@ void Room::OnEvent(const FfiEvent &event) {
     }
     case proto::RoomEvent::kParticipantsUpdated: {
       auto ev = fromProto(re.participants_updated());
+      {
+        std::lock_guard<std::mutex> guard(lock_);
+        const auto &pu = re.participants_updated();
+        for (const auto &info : pu.participants()) {
+          const std::string &identity = info.identity();
+          Participant *participant = nullptr;
+          // First, check local participant.
+          if (local_participant_ &&
+              identity == local_participant_->identity()) {
+            participant = local_participant_.get();
+          } else {
+            // Otherwise, look for a remote participant.
+            auto it = remote_participants_.find(identity);
+            if (it != remote_participants_.end()) {
+              participant = it->second.get();
+            }
+          }
+
+          if (!participant) {
+            // Participant might not exist yet; ignore for now.
+            std::cerr << "Room::RoomEvent::kParticipantsUpdated participant "
+                         "does not exist: "
+                      << identity << std::endl;
+            continue;
+          }
+
+          // Update basic fields
+          participant->set_name(info.name());
+          participant->set_metadata(info.metadata());
+          std::unordered_map<std::string, std::string> attrs;
+          attrs.reserve(info.attributes_size());
+          for (const auto &kv : info.attributes()) {
+            attrs.emplace(kv.first, kv.second);
+          }
+          participant->set_attributes(std::move(attrs));
+          participant->set_kind(fromProto(info.kind()));
+          participant->set_disconnect_reason(
+              toDisconnectReason(info.disconnect_reason()));
+        }
+      }
       delegate_snapshot->onParticipantsUpdated(*this, ev);
       break;
     }
diff --git a/src/room_proto_converter.cpp b/src/room_proto_converter.cpp
index 1df125c..5423f07 100644
--- a/src/room_proto_converter.cpp
+++ b/src/room_proto_converter.cpp
@@ -215,12 +215,12 @@ DataStreamTrailerData fromProto(const proto::DataStream_Trailer &in) {
 
 // --------- event conversions ---------
 
-ParticipantConnectedEvent
-fromProto(const proto::ParticipantConnected & /*in*/) {
+ParticipantConnectedEvent fromProto(const proto::ParticipantConnected &in) {
   ParticipantConnectedEvent ev;
-  // in.info() is OwnedParticipant; you can fill more fields once you inspect
-  // it. For now, leave metadata/name/identity as TODO.
-  // TODO: map in.info().info().identity(), name(), metadata(), etc.
+  const auto &pinfo = in.info().info();
+  ev.identity = pinfo.identity();
+  ev.name = pinfo.name();
+  ev.metadata = pinfo.metadata();
   return ev;
 }
 
diff --git a/src/video_stream.cpp b/src/video_stream.cpp
new file mode 100644
index 0000000..42f28d9
--- /dev/null
+++ b/src/video_stream.cpp
@@ -0,0 +1,241 @@
+#include "video_stream.h"
+
+#include <utility>
+
+#include "ffi_client.h"
+// Include your actual generated proto headers here:
+#include "proto/ffi_rpc.pb.h"
+#include "proto/video_frame.pb.h"
+
+namespace livekit {
+
+using proto::FfiEvent;
+using proto::FfiRequest;
+using proto::VideoStreamEvent;
+
+// ------------------------
+// Factory helpers
+// ------------------------
+
+std::unique_ptr<VideoStream>
+VideoStream::fromTrack(const std::shared_ptr<Track> &track,
+                       const Options &options) {
+  auto stream = std::unique_ptr<VideoStream>(new VideoStream());
+  stream->initFromTrack(track, options);
+  return stream;
+}
+
+std::unique_ptr<VideoStream>
+VideoStream::fromParticipant(Participant &participant, TrackSource track_source,
+                             const Options &options) {
+  auto stream = std::unique_ptr<VideoStream>(new VideoStream());
+  stream->initFromParticipant(participant, track_source, options);
+  return stream;
+}
+
+// ------------------------
+// Destructor / move
+// ------------------------
+
+VideoStream::~VideoStream() { close(); }
+
+VideoStream::VideoStream(VideoStream &&other) noexcept {
+  std::lock_guard<std::mutex> lock(other.mutex_);
+  queue_ = std::move(other.queue_);
+  capacity_ = other.capacity_;
+  eof_ = other.eof_;
+  closed_ = other.closed_;
+  stream_handle_ = std::move(other.stream_handle_);
+  listener_id_ = other.listener_id_;
+
+  other.listener_id_ = 0;
+  other.closed_ = true;
+}
+
+VideoStream &VideoStream::operator=(VideoStream &&other) noexcept {
+  if (this == &other)
+    return *this;
+
+  close();
+
+  {
+    std::lock_guard<std::mutex> lock_this(mutex_);
+    std::lock_guard<std::mutex> lock_other(other.mutex_);
+
+    queue_ = std::move(other.queue_);
+    capacity_ = other.capacity_;
+    eof_ = other.eof_;
+    closed_ = other.closed_;
+    stream_handle_ = std::move(other.stream_handle_);
+    listener_id_ = other.listener_id_;
+
+    other.listener_id_ = 0;
+    other.closed_ = true;
+  }
+
+  return *this;
+}
+
+// ------------------------
+// Init internals
+// ------------------------
+
+void VideoStream::initFromTrack(const std::shared_ptr<Track> &track,
+                                const Options &options) {
+  capacity_ = options.capacity;
+
+  // 1) Subscribe to FFI events
+  listener_id_ = FfiClient::instance().AddListener(
+      [this](const FfiEvent &e) { this->onFfiEvent(e); });
+
+  // 2) Send FFI request to create a new video stream bound to this track
+  FfiRequest req;
+  auto *new_video_stream = req.mutable_new_video_stream();
+  new_video_stream->set_track_handle(track->ffiHandleId());
+  new_video_stream->set_type(proto::VideoStreamType::VIDEO_STREAM_NATIVE);
+  new_video_stream->set_normalize_stride(true);
+  if (options.format.has_value()) {
+    new_video_stream->set_format(static_cast<int>(*options.format));
+  }
+
+  auto resp = FfiClient::instance().request(req);
+  // Adjust field names to match your proto exactly:
+  const auto &stream = resp.new_video_stream().stream();
+  stream_handle_ = FfiHandle(static_cast<uintptr_t>(stream.handle().id()));
+  // stream.info() is available if you want to cache metadata.
+}
+
+void VideoStream::initFromParticipant(Participant &participant,
+                                      TrackSource track_source,
+                                      const Options &options) {
+  capacity_ = options.capacity;
+
+  // 1) Subscribe to FFI events
+  listener_id_ = FfiClient::instance().AddListener(
+      [this](const FfiEvent &e) { this->onFfiEvent(e); });
+
+  // 2) Send FFI request to create a video stream from participant + track
+  // source
+  FfiRequest req;
+  auto *vs = req.mutable_video_stream_from_participant();
+  vs->set_participant_handle(participant.ffiHandleId());
+  vs->set_type(proto::VideoStreamType::VIDEO_STREAM_NATIVE);
+  vs->set_track_source(static_cast<int>(track_source));
+  vs->set_normalize_stride(true);
+  if (options.format.has_value()) {
+    vs->set_format(static_cast<int>(*options.format));
+  }
+
+  auto resp = FfiClient::instance().request(req);
+  // Adjust field names to match your proto exactly:
+  const auto &stream = resp.video_stream_from_participant().stream();
+  stream_handle_ = FfiHandle(static_cast<uintptr_t>(stream.handle().id()));
+}
+
+// ------------------------
+// Public API
+// ------------------------
+
+bool VideoStream::read(VideoFrameEvent &out) {
+  std::unique_lock<std::mutex> lock(mutex_);
+
+  cv_.wait(lock, [this] { return !queue_.empty() || eof_ || closed_; });
+
+  if (closed_ || (queue_.empty() && eof_)) {
+    return false; // EOS / closed
+  }
+
+  out = std::move(queue_.front());
+  queue_.pop_front();
+  return true;
+}
+
+void VideoStream::close() {
+  {
+    std::lock_guard<std::mutex> lock(mutex_);
+    if (closed_) {
+      return;
+    }
+    closed_ = true;
+  }
+
+  // Dispose FFI handle
+  if (stream_handle_.get() != 0) {
+    stream_handle_.dispose();
+  }
+
+  // Remove listener
+  if (listener_id_ != 0) {
+    FfiClient::instance().RemoveListener(listener_id_);
+    listener_id_ = 0;
+  }
+
+  // Wake any waiting readers
+  cv_.notify_all();
+}
+
+// ------------------------
+// Internal helpers
+// ------------------------
+
+void VideoStream::onFfiEvent(const FfiEvent &event) {
+  // Filter for video_stream_event first.
+  if (event.message_case() != FfiEvent::kVideoStreamEvent) {
+    return;
+  }
+
+  const auto &vse = event.video_stream_event();
+
+  // Check if this event is for our stream handle.
+  if (static_cast<uintptr_t>(vse.stream_handle().id()) !=
+      stream_handle_.get()) {
+    return;
+  }
+
+  // Handle frame_received or eos.
+  if (vse.has_frame_received()) {
+    const auto &fr = vse.frame_received();
+
+    // Convert owned buffer->VideoFrame via a helper.
+    // You should implement this static function in your VideoFrame class.
+    VideoFrame frame = VideoFrame::fromOwnedInfo(fr.buffer());
+
+    VideoFrameEvent ev{std::move(frame), fr.timestamp_us(),
+                       static_cast<VideoRotation>(fr.rotation())};
+
+    pushFrame(std::move(ev));
+  } else if (vse.has_eos()) {
+    pushEos();
+  }
+}
+
+void VideoStream::pushFrame(VideoFrameEvent &&ev) {
+  {
+    std::lock_guard<std::mutex> lock(mutex_);
+
+    if (closed_ || eof_) {
+      return;
+    }
+
+    if (capacity_ > 0 && queue_.size() >= capacity_) {
+      // Ring behavior: drop oldest frame.
+      queue_.pop_front();
+    }
+
+    queue_.push_back(std::move(ev));
+  }
+  cv_.notify_one();
+}
+
+void VideoStream::pushEos() {
+  {
+    std::lock_guard<std::mutex> lock(mutex_);
+    if (eof_) {
+      return;
+    }
+    eof_ = true;
+  }
+  cv_.notify_all();
+}
+
+} // namespace livekit

From f872678866a9a83b2de01cbc1e585fa3066c7915 Mon Sep 17 00:00:00 2001
From: shijing xian <shijing.xian@livekit.io>
Date: Tue, 25 Nov 2025 14:06:43 -0800
Subject: [PATCH 2/5] Get things functional

---
 CMakeLists.txt                 |   4 +
 include/livekit/audio_stream.h | 107 ++++++++++++++
 include/livekit/video_frame.h  |  20 +--
 include/livekit/video_stream.h |  38 +++--
 src/audio_stream.cpp           | 254 +++++++++++++++++++++++++++++++++
 src/video_frame.cpp            |  85 +++++++----
 src/video_stream.cpp           |  41 +++---
 src/video_utils.cpp            |  20 +--
 src/video_utils.h              |   2 +
 9 files changed, 485 insertions(+), 86 deletions(-)
 create mode 100644 include/livekit/audio_stream.h
 create mode 100644 src/audio_stream.cpp

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 9dab685..6baabde 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -158,6 +158,7 @@ add_custom_command(
 add_library(livekit
   include/livekit/audio_frame.h
   include/livekit/audio_source.h
+  include/livekit/audio_stream.h
   include/livekit/room.h
   include/livekit/room_delegate.h
   include/livekit/ffi_handle.h
@@ -175,10 +176,12 @@ add_library(livekit
   include/livekit/remote_track_publication.h
   include/livekit/video_frame.h
   include/livekit/video_source.h
+  include/livekit/video_stream.h
   include/livekit/local_video_track.h
   include/livekit/remote_video_track.h
   src/audio_frame.cpp
   src/audio_source.cpp
+  src/audio_stream.cpp
   src/ffi_handle.cpp
   src/ffi_client.cpp
   src/local_audio_track.cpp
@@ -197,6 +200,7 @@ add_library(livekit
   src/remote_track_publication.cpp
   src/video_frame.cpp
   src/video_source.cpp
+  src/video_stream.cpp
   src/local_video_track.cpp
   src/remote_video_track.cpp
   src/video_utils.cpp
diff --git a/include/livekit/audio_stream.h b/include/livekit/audio_stream.h
new file mode 100644
index 0000000..84aac8a
--- /dev/null
+++ b/include/livekit/audio_stream.h
@@ -0,0 +1,107 @@
+/*
+ * Copyright 2025 LiveKit
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an “AS IS” BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+#include <condition_variable>
+#include <cstdint>
+#include <deque>
+#include <memory>
+#include <mutex>
+#include <optional>
+#include <string>
+
+#include "audio_frame.h"
+#include "ffi_handle.h"
+#include "participant.h"
+#include "track.h"
+
+namespace livekit {
+
+namespace proto {
+class FfiEvent;
+}
+
+struct AudioFrameEvent {
+  AudioFrame frame;
+};
+
+class AudioStream {
+public:
+  struct Options {
+    std::size_t capacity{0}; // 0 = unbounded
+    int sample_rate{48000};
+    int num_channels{1};
+    std::string noise_cancellation_module;       // empty = disabled
+    std::string noise_cancellation_options_json; // empty = no options
+  };
+
+  // Factory: create an AudioStream bound to a specific Track
+  static std::unique_ptr<AudioStream>
+  from_track(const std::shared_ptr<Track> &track, const Options &options);
+
+  // Factory: create an AudioStream from a Participant + TrackSource
+  static std::unique_ptr<AudioStream> from_participant(Participant &participant,
+                                                       TrackSource track_source,
+                                                       const Options &options);
+
+  ~AudioStream();
+
+  AudioStream(const AudioStream &) = delete;
+  AudioStream &operator=(const AudioStream &) = delete;
+  AudioStream(AudioStream &&) noexcept;
+  AudioStream &operator=(AudioStream &&) noexcept;
+
+  /// Blocking read: returns true if a frame was delivered,
+  /// false if the stream has ended (EOS or closed).
+  bool read(AudioFrameEvent &out_event);
+
+  /// Signal that we are no longer interested in frames.
+  /// Disposes the underlying FFI stream and removes the listener.
+  void close();
+
+private:
+  AudioStream() = default;
+
+  void init_from_track(const std::shared_ptr<Track> &track,
+                       const Options &options);
+  void init_from_participant(Participant &participant, TrackSource track_source,
+                             const Options &options);
+
+  // FFI event handler (registered with FfiClient)
+  void on_ffi_event(const proto::FfiEvent &event);
+
+  // Queue helpers
+  void push_frame(AudioFrameEvent &&ev);
+  void push_eos();
+
+  mutable std::mutex mutex_;
+  std::condition_variable cv_;
+  std::deque<AudioFrameEvent> queue_;
+  std::size_t capacity_{0};
+  bool eof_{false};
+  bool closed_{false};
+
+  Options options_;
+
+  // Underlying FFI audio stream handle
+  FfiHandle stream_handle_;
+
+  // Listener id registered on FfiClient
+  std::int64_t listener_id_{0};
+};
+
+} // namespace livekit
diff --git a/include/livekit/video_frame.h b/include/livekit/video_frame.h
index 3ba4f16..a43dc21 100644
--- a/include/livekit/video_frame.h
+++ b/include/livekit/video_frame.h
@@ -44,6 +44,10 @@ struct VideoPlaneInfo {
   std::uint32_t size;      // plane size in bytes
 };
 
+namespace proto {
+class OwnedVideoBuffer;
+}
+
 /**
  * Public SDK representation of a video frame.
  *
@@ -63,16 +67,6 @@ class LKVideoFrame {
   LKVideoFrame(LKVideoFrame &&) noexcept = default;
   LKVideoFrame &operator=(LKVideoFrame &&) noexcept = default;
 
-  /* LKVideoFrame(LKVideoFrame&& other) noexcept
-      : width_(other.width_),
-        height_(other.height_),
-        type_(other.type_),
-        data_(std::move(other.data_)) {
-      other.width_ = 0;
-      other.height_ = 0;
-  }
-  LKVideoFrame& operator=(LKVideoFrame&& other) noexcept;*/
-
   /**
    * Allocate a new frame with the correct buffer size for the given format.
    * Data is zero-initialized.
@@ -123,6 +117,12 @@ class LKVideoFrame {
    */
   LKVideoFrame convert(VideoBufferType dst, bool flip_y = false) const;
 
+protected:
+  friend class VideoStream;
+  // Only internal classes (e.g., VideoStream)
+  // should construct frames directly from FFI buffers.
+  static LKVideoFrame fromOwnedInfo(const proto::OwnedVideoBuffer &owned);
+
 private:
   int width_;
   int height_;
diff --git a/include/livekit/video_stream.h b/include/livekit/video_stream.h
index 2008358..322ab1d 100644
--- a/include/livekit/video_stream.h
+++ b/include/livekit/video_stream.h
@@ -1,3 +1,19 @@
+/*
+ * Copyright 2025 LiveKit
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an “AS IS” BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
 #pragma once
 
 #include <condition_variable>
@@ -11,32 +27,36 @@
 #include "participant.h"
 #include "track.h"
 #include "video_frame.h"
+#include "video_source.h"
 
 namespace livekit {
 
 // C++ equivalent of Python VideoFrameEvent
 struct VideoFrameEvent {
-  VideoFrame frame;
+  LKVideoFrame frame;
   std::int64_t timestamp_us;
   VideoRotation rotation;
 };
 
+namespace proto {
+class FfiEvent;
+}
+
 class VideoStream {
 public:
   struct Options {
-    std::size_t capacity{0};               // 0 = unbounded
-    std::optional<VideoBufferType> format; // optional pixel format
+    std::size_t capacity{0}; // 0 = unbounded
+    VideoBufferType format;
   };
 
   // Factory: create a VideoStream bound to a specific Track
   static std::unique_ptr<VideoStream>
-  fromTrack(const std::shared_ptr<Track> &track,
-            const Options &options = Options{});
+  fromTrack(const std::shared_ptr<Track> &track, const Options &options);
 
   // Factory: create a VideoStream from a Participant + TrackSource
-  static std::unique_ptr<VideoStream>
-  fromParticipant(Participant &participant, TrackSource track_source,
-                  const Options &options = Options{});
+  static std::unique_ptr<VideoStream> fromParticipant(Participant &participant,
+                                                      TrackSource track_source,
+                                                      const Options &options);
 
   ~VideoStream();
 
@@ -63,7 +83,7 @@ class VideoStream {
                            const Options &options);
 
   // FFI event handler (registered with FfiClient)
-  void onFfiEvent(const FfiEvent &event);
+  void onFfiEvent(const proto::FfiEvent &event);
 
   // Queue helpers
   void pushFrame(VideoFrameEvent &&ev);
diff --git a/src/audio_stream.cpp b/src/audio_stream.cpp
new file mode 100644
index 0000000..975908f
--- /dev/null
+++ b/src/audio_stream.cpp
@@ -0,0 +1,254 @@
+/*
+ * Copyright 2025 LiveKit
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an “AS IS” BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "livekit/audio_stream.h"
+
+#include <utility>
+
+#include "audio_frame.pb.h"
+#include "ffi.pb.h"
+#include "livekit/ffi_client.h"
+#include "livekit/track.h"
+
+namespace livekit {
+
+using proto::FfiEvent;
+using proto::FfiRequest;
+
+// ------------------------
+// Factory helpers
+// ------------------------
+
+std::unique_ptr<AudioStream>
+AudioStream::from_track(const std::shared_ptr<Track> &track,
+                        const Options &options) {
+  auto stream = std::unique_ptr<AudioStream>(new AudioStream());
+  stream->init_from_track(track, options);
+  return stream;
+}
+
+std::unique_ptr<AudioStream>
+AudioStream::from_participant(Participant &participant,
+                              TrackSource track_source,
+                              const Options &options) {
+  auto stream = std::unique_ptr<AudioStream>(new AudioStream());
+  stream->init_from_participant(participant, track_source, options);
+  return stream;
+}
+
+// ------------------------
+// Destructor / move
+// ------------------------
+
+AudioStream::~AudioStream() { close(); }
+
+AudioStream::AudioStream(AudioStream &&other) noexcept {
+  std::lock_guard<std::mutex> lock(other.mutex_);
+  queue_ = std::move(other.queue_);
+  capacity_ = other.capacity_;
+  eof_ = other.eof_;
+  closed_ = other.closed_;
+  options_ = other.options_;
+  stream_handle_ = std::move(other.stream_handle_);
+  listener_id_ = other.listener_id_;
+
+  other.listener_id_ = 0;
+  other.closed_ = true;
+}
+
+AudioStream &AudioStream::operator=(AudioStream &&other) noexcept {
+  if (this == &other) {
+    return *this;
+  }
+
+  close();
+
+  {
+    std::lock_guard<std::mutex> lock_this(mutex_);
+    std::lock_guard<std::mutex> lock_other(other.mutex_);
+
+    queue_ = std::move(other.queue_);
+    capacity_ = other.capacity_;
+    eof_ = other.eof_;
+    closed_ = other.closed_;
+    options_ = other.options_;
+    stream_handle_ = std::move(other.stream_handle_);
+    listener_id_ = other.listener_id_;
+
+    other.listener_id_ = 0;
+    other.closed_ = true;
+  }
+
+  return *this;
+}
+
+bool AudioStream::read(AudioFrameEvent &out_event) {
+  std::unique_lock<std::mutex> lock(mutex_);
+
+  cv_.wait(lock, [this] { return !queue_.empty() || eof_ || closed_; });
+
+  if (closed_ || (queue_.empty() && eof_)) {
+    return false; // EOS / closed
+  }
+
+  out_event = std::move(queue_.front());
+  queue_.pop_front();
+  return true;
+}
+
+void AudioStream::close() {
+  {
+    std::lock_guard<std::mutex> lock(mutex_);
+    if (closed_) {
+      return;
+    }
+    closed_ = true;
+  }
+
+  // Dispose FFI handle
+  if (stream_handle_.get() != 0) {
+    stream_handle_.reset();
+  }
+
+  // Remove listener
+  if (listener_id_ != 0) {
+    FfiClient::instance().RemoveListener(listener_id_);
+    listener_id_ = 0;
+  }
+
+  // Wake any waiting readers
+  cv_.notify_all();
+}
+
+// Internal functions
+
+void AudioStream::init_from_track(const std::shared_ptr<Track> &track,
+                                  const Options &options) {
+  capacity_ = options.capacity;
+  options_ = options;
+
+  // 1) Subscribe to FFI events
+  listener_id_ = FfiClient::instance().AddListener(
+      [this](const FfiEvent &e) { this->on_ffi_event(e); });
+
+  // 2) Send FfiRequest to create a new audio stream bound to this track
+  FfiRequest req;
+  auto *new_audio_stream = req.mutable_new_audio_stream();
+  new_audio_stream->set_track_handle(
+      static_cast<uint64_t>(track->ffi_handle_id()));
+  new_audio_stream->set_sample_rate(options_.sample_rate);
+  new_audio_stream->set_num_channels(options.num_channels);
+  new_audio_stream->set_type(proto::AudioStreamType::AUDIO_STREAM_NATIVE);
+
+  if (!options_.noise_cancellation_module.empty()) {
+    new_audio_stream->set_audio_filter_module_id(
+        options_.noise_cancellation_module);
+    // Always set options JSON even if empty — backend will treat empty string
+    // as “no options”
+    new_audio_stream->set_audio_filter_options(
+        options_.noise_cancellation_options_json);
+  }
+
+  auto resp = FfiClient::instance().sendRequest(req);
+  const auto &stream = resp.new_audio_stream().stream();
+  stream_handle_ = FfiHandle(static_cast<uintptr_t>(stream.handle().id()));
+}
+
+void AudioStream::init_from_participant(Participant &participant,
+                                        TrackSource track_source,
+                                        const Options &options) {
+  capacity_ = options.capacity;
+  options_ = options;
+
+  // 1) Subscribe to FFI events
+  listener_id_ = FfiClient::instance().AddListener(
+      [this](const FfiEvent &e) { this->on_ffi_event(e); });
+
+  // 2) Send FfiRequest to create audio stream from participant + track source
+  FfiRequest req;
+  auto *as = req.mutable_audio_stream_from_participant();
+  as->set_participant_handle(participant.ffiHandleId());
+  as->set_sample_rate(options_.sample_rate);
+  as->set_num_channels(options_.num_channels);
+  as->set_type(proto::AudioStreamType::AUDIO_STREAM_NATIVE);
+  as->set_track_source(static_cast<proto::TrackSource>(track_source));
+
+  if (!options_.noise_cancellation_module.empty()) {
+    as->set_audio_filter_module_id(options_.noise_cancellation_module);
+    // Always set options JSON even if empty — backend will treat empty string
+    // as “no options”
+    as->set_audio_filter_options(options_.noise_cancellation_options_json);
+  }
+
+  auto resp = FfiClient::instance().sendRequest(req);
+  const auto &stream = resp.audio_stream_from_participant().stream();
+  stream_handle_ = FfiHandle(static_cast<uintptr_t>(stream.handle().id()));
+}
+
+void AudioStream::on_ffi_event(const FfiEvent &event) {
+  if (event.message_case() != FfiEvent::kAudioStreamEvent) {
+    return;
+  }
+
+  const auto &ase = event.audio_stream_event();
+  // Check if this event is for our stream handle.
+  if (ase.stream_handle() != static_cast<std::uint64_t>(stream_handle_.get())) {
+    return;
+  }
+  if (ase.has_frame_received()) {
+    const auto &fr = ase.frame_received();
+
+    // Convert owned buffer -> AudioFrame via helper.
+    // Implement AudioFrame::fromOwnedInfo(...) to mirror Python's
+    // AudioFrame._from_owned_info.
+    AudioFrame frame = AudioFrame::fromOwnedInfo(fr.frame());
+    AudioFrameEvent ev{std::move(frame)};
+    push_frame(std::move(ev));
+  } else if (ase.has_eos()) {
+    push_eos();
+  }
+}
+
+void AudioStream::push_frame(AudioFrameEvent &&ev) {
+  {
+    std::lock_guard<std::mutex> lock(mutex_);
+
+    if (closed_ || eof_) {
+      return;
+    }
+
+    if (capacity_ > 0 && queue_.size() >= capacity_) {
+      // Ring behavior: drop oldest frame when full.
+      queue_.pop_front();
+    }
+
+    queue_.push_back(std::move(ev));
+  }
+  cv_.notify_one();
+}
+
+void AudioStream::push_eos() {
+  {
+    std::lock_guard<std::mutex> lock(mutex_);
+    if (eof_) {
+      return;
+    }
+    eof_ = true;
+  }
+  cv_.notify_all();
+}
+
+} // namespace livekit
diff --git a/src/video_frame.cpp b/src/video_frame.cpp
index badce04..a56e7f2 100644
--- a/src/video_frame.cpp
+++ b/src/video_frame.cpp
@@ -6,6 +6,7 @@
 #include <stdexcept>
 #include <vector>
 
+#include "livekit/ffi_handle.h"
 #include "video_utils.h"
 
 namespace livekit {
@@ -271,37 +272,7 @@ LKVideoFrame::LKVideoFrame(int width, int height, VideoBufferType type,
     throw std::invalid_argument("LKVideoFrame: provided data is too small for "
                                 "the specified format and size");
   }
-  std::cout << "width_ is " << width_ << std::endl;
-  std::cout << "height_ is " << height_ << std::endl;
 }
-/*
-LKVideoFrame& LKVideoFrame::operator=(LKVideoFrame&& other) noexcept {
-    // 1. Self-assignment check
-    if (this == &other) {
-        return *this;
-    }
-
-    // 2. Resource cleanup (The std::vector handles its own memory cleanup,
-    // but the simple members must be transferred.)
-
-    // 3. Transfer simple members (width, height, type)
-    width_ = other.width_;
-    height_ = other.height_;
-    type_ = other.type_;
-
-    // 4. Transfer complex resource (std::vector)
-    // std::move() is used to invoke std::vector's move assignment operator
-    data_ = std::move(other.data_);
-
-    // 5. Optional: Reset the 'other' object to a valid but empty/default state.
-    // This is good practice for the object that has been moved-from.
-    other.width_ = 0;
-    other.height_ = 0;
-    // other.data_ is already empty after the move assignment
-
-    // 6. Return reference to the assigned object
-    return *this;
-}*/
 
 LKVideoFrame LKVideoFrame::create(int width, int height, VideoBufferType type) {
   const std::size_t size = computeBufferSize(width, height, type);
@@ -335,4 +306,58 @@ LKVideoFrame LKVideoFrame::convert(VideoBufferType dst, bool flip_y) const {
   return convertViaFfi(*this, dst, flip_y);
 }
 
+LKVideoFrame LKVideoFrame::fromOwnedInfo(const proto::OwnedVideoBuffer &owned) {
+  const auto &info = owned.info();
+  const int width = static_cast<int>(info.width());
+  const int height = static_cast<int>(info.height());
+  // Assuming your C++ enum matches proto's underlying values.
+  const VideoBufferType type = static_cast<VideoBufferType>(info.type());
+
+  std::vector<std::uint8_t> buffer;
+
+  if (info.components_size() > 0) {
+    // Multi-plane (e.g. I420, NV12, etc.). We pack planes back-to-back.
+    std::size_t total_size = 0;
+    for (const auto &comp : info.components()) {
+      total_size += static_cast<std::size_t>(comp.size());
+    }
+
+    buffer.resize(total_size);
+    std::size_t offset = 0;
+    for (const auto &comp : info.components()) {
+      const auto sz = static_cast<std::size_t>(comp.size());
+      const auto src_ptr = reinterpret_cast<const std::uint8_t *>(
+          static_cast<std::uintptr_t>(comp.data_ptr()));
+
+      std::memcpy(buffer.data() + offset, src_ptr, sz);
+      offset += sz;
+    }
+  } else {
+    // Packed format: treat top-level data_ptr as a single contiguous buffer.
+    const auto src_ptr = reinterpret_cast<const std::uint8_t *>(
+        static_cast<std::uintptr_t>(info.data_ptr()));
+
+    std::size_t total_size = 0;
+    if (info.has_stride()) {
+      // Use stride * height as total size (includes per-row padding if any).
+      total_size = static_cast<std::size_t>(info.stride()) *
+                   static_cast<std::size_t>(height);
+    } else {
+      // Use our generic buffer-size helper (width/height/type).
+      total_size = computeBufferSize(width, height, type);
+    }
+
+    buffer.resize(total_size);
+    std::memcpy(buffer.data(), src_ptr, total_size);
+  }
+
+  // Release the FFI-owned buffer after copying the data.
+  {
+    FfiHandle owned_handle(static_cast<std::uintptr_t>(owned.handle().id()));
+    // owned_handle destroyed at end of scope → native buffer disposed.
+  }
+
+  return LKVideoFrame(width, height, type, std::move(buffer));
+}
+
 } // namespace livekit
diff --git a/src/video_stream.cpp b/src/video_stream.cpp
index 42f28d9..19b1903 100644
--- a/src/video_stream.cpp
+++ b/src/video_stream.cpp
@@ -1,11 +1,12 @@
-#include "video_stream.h"
+#include "livekit/video_stream.h"
 
 #include <utility>
 
-#include "ffi_client.h"
-// Include your actual generated proto headers here:
-#include "proto/ffi_rpc.pb.h"
-#include "proto/video_frame.pb.h"
+#include "ffi.pb.h"
+#include "livekit/ffi_client.h"
+#include "livekit/track.h"
+#include "video_frame.pb.h"
+#include "video_utils.h"
 
 namespace livekit {
 
@@ -86,19 +87,17 @@ void VideoStream::initFromTrack(const std::shared_ptr<Track> &track,
 
   // 1) Subscribe to FFI events
   listener_id_ = FfiClient::instance().AddListener(
-      [this](const FfiEvent &e) { this->onFfiEvent(e); });
+      [this](const proto::FfiEvent &e) { this->onFfiEvent(e); });
 
   // 2) Send FFI request to create a new video stream bound to this track
   FfiRequest req;
   auto *new_video_stream = req.mutable_new_video_stream();
-  new_video_stream->set_track_handle(track->ffiHandleId());
+  new_video_stream->set_track_handle(track->ffi_handle_id());
   new_video_stream->set_type(proto::VideoStreamType::VIDEO_STREAM_NATIVE);
   new_video_stream->set_normalize_stride(true);
-  if (options.format.has_value()) {
-    new_video_stream->set_format(static_cast<int>(*options.format));
-  }
+  new_video_stream->set_format(toProto(options.format));
 
-  auto resp = FfiClient::instance().request(req);
+  auto resp = FfiClient::instance().sendRequest(req);
   // Adjust field names to match your proto exactly:
   const auto &stream = resp.new_video_stream().stream();
   stream_handle_ = FfiHandle(static_cast<uintptr_t>(stream.handle().id()));
@@ -120,13 +119,11 @@ void VideoStream::initFromParticipant(Participant &participant,
   auto *vs = req.mutable_video_stream_from_participant();
   vs->set_participant_handle(participant.ffiHandleId());
   vs->set_type(proto::VideoStreamType::VIDEO_STREAM_NATIVE);
-  vs->set_track_source(static_cast<int>(track_source));
+  vs->set_track_source(static_cast<proto::TrackSource>(track_source));
   vs->set_normalize_stride(true);
-  if (options.format.has_value()) {
-    vs->set_format(static_cast<int>(*options.format));
-  }
+  vs->set_format(toProto(options.format));
 
-  auto resp = FfiClient::instance().request(req);
+  auto resp = FfiClient::instance().sendRequest(req);
   // Adjust field names to match your proto exactly:
   const auto &stream = resp.video_stream_from_participant().stream();
   stream_handle_ = FfiHandle(static_cast<uintptr_t>(stream.handle().id()));
@@ -161,7 +158,7 @@ void VideoStream::close() {
 
   // Dispose FFI handle
   if (stream_handle_.get() != 0) {
-    stream_handle_.dispose();
+    stream_handle_.reset();
   }
 
   // Remove listener
@@ -178,27 +175,23 @@ void VideoStream::close() {
 // Internal helpers
 // ------------------------
 
-void VideoStream::onFfiEvent(const FfiEvent &event) {
+void VideoStream::onFfiEvent(const proto::FfiEvent &event) {
   // Filter for video_stream_event first.
   if (event.message_case() != FfiEvent::kVideoStreamEvent) {
     return;
   }
-
   const auto &vse = event.video_stream_event();
-
   // Check if this event is for our stream handle.
-  if (static_cast<uintptr_t>(vse.stream_handle().id()) !=
-      stream_handle_.get()) {
+  if (vse.stream_handle() != static_cast<std::uint64_t>(stream_handle_.get())) {
     return;
   }
-
   // Handle frame_received or eos.
   if (vse.has_frame_received()) {
     const auto &fr = vse.frame_received();
 
     // Convert owned buffer->VideoFrame via a helper.
     // You should implement this static function in your VideoFrame class.
-    VideoFrame frame = VideoFrame::fromOwnedInfo(fr.buffer());
+    LKVideoFrame frame = LKVideoFrame::fromOwnedInfo(fr.buffer());
 
     VideoFrameEvent ev{std::move(frame), fr.timestamp_us(),
                        static_cast<VideoRotation>(fr.rotation())};
diff --git a/src/video_utils.cpp b/src/video_utils.cpp
index be1470f..4f4c5c0 100644
--- a/src/video_utils.cpp
+++ b/src/video_utils.cpp
@@ -11,10 +11,7 @@
 
 namespace livekit {
 
-namespace {
-
-// Map SDK enum -> proto enum
-proto::VideoBufferType toProtoBufferType(VideoBufferType t) {
+proto::VideoBufferType toProto(VideoBufferType t) {
   switch (t) {
   case VideoBufferType::ARGB:
     return proto::VideoBufferType::ARGB;
@@ -39,12 +36,12 @@ proto::VideoBufferType toProtoBufferType(VideoBufferType t) {
   case VideoBufferType::NV12:
     return proto::VideoBufferType::NV12;
   default:
-    throw std::runtime_error("Unknown VideoBufferType in toProtoBufferType");
+    throw std::runtime_error("Unknown VideoBufferType in toProto");
   }
 }
 
 // Map proto enum -> SDK enum
-VideoBufferType fromProtoBufferType(proto::VideoBufferType t) {
+VideoBufferType fromProto(proto::VideoBufferType t) {
   switch (t) {
   case proto::VideoBufferType::ARGB:
     return VideoBufferType::ARGB;
@@ -69,13 +66,10 @@ VideoBufferType fromProtoBufferType(proto::VideoBufferType t) {
   case proto::VideoBufferType::NV12:
     return VideoBufferType::NV12;
   default:
-    throw std::runtime_error(
-        "Unknown proto::VideoBufferType in fromProtoBufferType");
+    throw std::runtime_error("Unknown proto::VideoBufferType in fromProto");
   }
 }
 
-} // namespace
-
 proto::VideoBufferInfo toProto(const LKVideoFrame &frame) {
   proto::VideoBufferInfo info;
 
@@ -83,7 +77,7 @@ proto::VideoBufferInfo toProto(const LKVideoFrame &frame) {
   const int h = frame.height();
   info.set_width(w);
   info.set_height(h);
-  info.set_type(toProtoBufferType(frame.type()));
+  info.set_type(toProto(frame.type()));
 
   // Backing data pointer for the whole buffer
   auto base_ptr = reinterpret_cast<std::uint64_t>(frame.data());
@@ -123,7 +117,7 @@ LKVideoFrame fromOwnedProto(const proto::OwnedVideoBuffer &owned) {
 
   const int width = static_cast<int>(info.width());
   const int height = static_cast<int>(info.height());
-  const VideoBufferType type = fromProtoBufferType(info.type());
+  const VideoBufferType type = fromProto(info.type());
 
   // Allocate a new LKVideoFrame with the correct size/format
   LKVideoFrame frame = LKVideoFrame::create(width, height, type);
@@ -154,7 +148,7 @@ LKVideoFrame convertViaFfi(const LKVideoFrame &frame, VideoBufferType dst,
   proto::FfiRequest req;
   auto *vc = req.mutable_video_convert();
   vc->set_flip_y(flip_y);
-  vc->set_dst_type(toProtoBufferType(dst));
+  vc->set_dst_type(toProto(dst));
   vc->mutable_buffer()->CopyFrom(toProto(frame));
 
   proto::FfiResponse resp = FfiClient::instance().sendRequest(req);
diff --git a/src/video_utils.h b/src/video_utils.h
index 5ece8f2..733e4a5 100644
--- a/src/video_utils.h
+++ b/src/video_utils.h
@@ -26,5 +26,7 @@ proto::VideoBufferInfo toProto(const LKVideoFrame &frame);
 LKVideoFrame fromOwnedProto(const proto::OwnedVideoBuffer &owned);
 LKVideoFrame convertViaFfi(const LKVideoFrame &frame, VideoBufferType dst,
                            bool flip_y);
+proto::VideoBufferType toProto(VideoBufferType t);
+VideoBufferType fromProto(proto::VideoBufferType t);
 
 } // namespace livekit

From 450c54390269647f1b987eaba164b9308da8f780 Mon Sep 17 00:00:00 2001
From: shijing xian <shijing.xian@livekit.io>
Date: Sat, 29 Nov 2025 14:50:05 -0800
Subject: [PATCH 3/5] Implement the SDL media handling and hook up audio /
 video streams

---
 examples/CMakeLists.txt                     |  19 +-
 examples/cmake/sdl3.cmake                   |  14 +
 examples/simple_room/fallback_capture.cpp   | 113 ++++
 examples/simple_room/fallback_capture.h     |  35 ++
 examples/simple_room/main.cpp               | 213 ++++----
 examples/simple_room/sdl_media.cpp          | 227 ++++++++
 examples/simple_room/sdl_media.h            | 128 +++++
 examples/simple_room/sdl_media_manager.cpp  | 557 ++++++++++++++++++++
 examples/simple_room/sdl_media_manager.h    | 109 ++++
 examples/simple_room/sdl_video_renderer.cpp | 165 ++++++
 examples/simple_room/sdl_video_renderer.h   |  52 ++
 include/livekit/audio_frame.h               |   1 +
 include/livekit/audio_stream.h              |  73 ++-
 include/livekit/ffi_client.h                |   4 +-
 include/livekit/livekit.h                   |   5 +-
 include/livekit/local_participant.h         |   3 +-
 include/livekit/participant.h               |   1 -
 include/livekit/remote_audio_track.h        |   6 +-
 include/livekit/remote_video_track.h        |   6 +-
 include/livekit/room.h                      | 134 ++++-
 include/livekit/room_delegate.h             |  11 +-
 include/livekit/stats.h                     |   4 -
 include/livekit/track.h                     |   3 +-
 include/livekit/track_publication.h         |   7 +-
 include/livekit/video_frame.h               |   6 +-
 include/livekit/video_stream.h              |  48 +-
 src/audio_frame.cpp                         |   3 +
 src/audio_stream.cpp                        |  58 +-
 src/ffi_client.cpp                          |  57 +-
 src/remote_audio_track.cpp                  |  19 +-
 src/remote_video_track.cpp                  |  19 +-
 src/room.cpp                                |  66 ++-
 src/room_event_converter.cpp                |  24 -
 src/room_event_converter.h                  |   1 -
 src/room_proto_converter.cpp                |  12 -
 src/room_proto_converter.h                  |   1 -
 src/video_frame.cpp                         |   6 +-
 src/video_stream.cpp                        | 125 +++--
 38 files changed, 1996 insertions(+), 339 deletions(-)
 create mode 100644 examples/cmake/sdl3.cmake
 create mode 100644 examples/simple_room/fallback_capture.cpp
 create mode 100644 examples/simple_room/fallback_capture.h
 create mode 100644 examples/simple_room/sdl_media.cpp
 create mode 100644 examples/simple_room/sdl_media.h
 create mode 100644 examples/simple_room/sdl_media_manager.cpp
 create mode 100644 examples/simple_room/sdl_media_manager.h
 create mode 100644 examples/simple_room/sdl_video_renderer.cpp
 create mode 100644 examples/simple_room/sdl_video_renderer.h

diff --git a/examples/CMakeLists.txt b/examples/CMakeLists.txt
index c8d68a4..ef7076e 100644
--- a/examples/CMakeLists.txt
+++ b/examples/CMakeLists.txt
@@ -1,16 +1,31 @@
 cmake_minimum_required(VERSION 3.31.0)
 project (livekit-examples)
 
+list(APPEND CMAKE_MODULE_PATH "${CMAKE_CURRENT_SOURCE_DIR}/cmake")
+include(sdl3)
+
 add_executable(SimpleRoom
     simple_room/main.cpp
+    simple_room/fallback_capture.cpp
+    simple_room/fallback_capture.h
+    simple_room/sdl_media.cpp
+    simple_room/sdl_media.h
+    simple_room/sdl_media_manager.cpp
+    simple_room/sdl_media_manager.h
+    simple_room/sdl_video_renderer.cpp
+    simple_room/sdl_video_renderer.h
     simple_room/wav_audio_source.cpp
     simple_room/wav_audio_source.h
 )
 
-target_link_libraries(SimpleRoom livekit)
+target_link_libraries(SimpleRoom
+    PRIVATE
+        livekit
+        SDL3::SDL3
+)
 
 add_custom_command(TARGET SimpleRoom POST_BUILD
     COMMAND ${CMAKE_COMMAND} -E copy_directory
             ${CMAKE_SOURCE_DIR}/data
             ${CMAKE_CURRENT_BINARY_DIR}/data
-)
\ No newline at end of file
+)
diff --git a/examples/cmake/sdl3.cmake b/examples/cmake/sdl3.cmake
new file mode 100644
index 0000000..8a1899e
--- /dev/null
+++ b/examples/cmake/sdl3.cmake
@@ -0,0 +1,14 @@
+# cmake/sdl3.cmake
+include(FetchContent)
+
+# Only fetch/build SDL3 once, even if this file is included multiple times
+if (NOT TARGET SDL3::SDL3)
+    FetchContent_Declare(
+        SDL3
+        GIT_REPOSITORY https://github.com/libsdl-org/SDL.git
+        GIT_TAG        release-3.2.26
+    )
+
+    FetchContent_MakeAvailable(SDL3)
+endif()
+
diff --git a/examples/simple_room/fallback_capture.cpp b/examples/simple_room/fallback_capture.cpp
new file mode 100644
index 0000000..158b81c
--- /dev/null
+++ b/examples/simple_room/fallback_capture.cpp
@@ -0,0 +1,113 @@
+/*
+ * Copyright 2025 LiveKit, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "fallback_capture.h"
+
+#include "livekit/livekit.h"
+#include "wav_audio_source.h"
+
+using namespace livekit;
+
+// Test utils to run a capture loop to publish noisy audio frames to the room
+void runNoiseCaptureLoop(const std::shared_ptr<AudioSource> &source,
+                         std::atomic<bool> &running_flag) {
+  const int sample_rate = source->sample_rate();
+  const int num_channels = source->num_channels();
+  const int frame_ms = 10;
+  const int samples_per_channel = sample_rate * frame_ms / 1000;
+
+  // FIX: variable name should not shadow the type
+  WavAudioSource wavSource("data/welcome.wav", 48000, 1, false);
+
+  using Clock = std::chrono::steady_clock;
+  auto next_deadline = Clock::now();
+  while (running_flag.load(std::memory_order_relaxed)) {
+    AudioFrame frame =
+        AudioFrame::create(sample_rate, num_channels, samples_per_channel);
+    wavSource.fillFrame(frame);
+    try {
+      source->captureFrame(frame);
+    } catch (const std::exception &e) {
+      std::cerr << "Error in captureFrame (noise): " << e.what() << std::endl;
+      break;
+    }
+
+    // Pace the loop to roughly real-time
+    next_deadline += std::chrono::milliseconds(frame_ms);
+    std::this_thread::sleep_until(next_deadline);
+  }
+
+  try {
+    source->clearQueue();
+  } catch (...) {
+    std::cout << "Error in clearQueue (noise)" << std::endl;
+  }
+}
+
+// Fake video source: solid color cycling
+void runFakeVideoCaptureLoop(const std::shared_ptr<VideoSource> &source,
+                             std::atomic<bool> &running_flag) {
+  auto frame = LKVideoFrame::create(1280, 720, VideoBufferType::BGRA);
+  const double framerate = 1.0 / 30.0;
+
+  while (running_flag.load(std::memory_order_relaxed)) {
+    static auto start = std::chrono::high_resolution_clock::now();
+    float t = std::chrono::duration<float>(
+                  std::chrono::high_resolution_clock::now() - start)
+                  .count();
+    // Cycle every 4 seconds: 0=red, 1=green, 2=blue, 3=black
+    int stage = static_cast<int>(t) % 4;
+
+    std::array<uint8_t, 4> rgb{};
+    switch (stage) {
+    case 0: // red
+      rgb = {255, 0, 0, 0};
+      break;
+    case 1: // green
+      rgb = {0, 255, 0, 0};
+      break;
+    case 2: // blue
+      rgb = {0, 0, 255, 0};
+      break;
+    case 3: // black
+    default:
+      rgb = {0, 0, 0, 0};
+      break;
+    }
+
+    // ARGB
+    uint8_t *data = frame.data();
+    const size_t size = frame.dataSize();
+    for (size_t i = 0; i < size; i += 4) {
+      data[i + 0] = 255;    // A
+      data[i + 1] = rgb[0]; // R
+      data[i + 2] = rgb[1]; // G
+      data[i + 3] = rgb[2]; // B
+    }
+
+    try {
+      // If VideoSource is ARGB-capable, pass frame.
+      // If it expects I420, pass i420 instead.
+      source->captureFrame(frame, 0, VideoRotation::VIDEO_ROTATION_0);
+    } catch (const std::exception &e) {
+      std::cerr << "Error in captureFrame (fake video): " << e.what()
+                << std::endl;
+      break;
+    }
+
+    std::this_thread::sleep_for(std::chrono::duration<double>(framerate));
+  }
+}
diff --git a/examples/simple_room/fallback_capture.h b/examples/simple_room/fallback_capture.h
new file mode 100644
index 0000000..a7d8536
--- /dev/null
+++ b/examples/simple_room/fallback_capture.h
@@ -0,0 +1,35 @@
+/*
+ * Copyright 2025 LiveKit, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+#include <chrono>
+#include <memory>
+
+// Assuming you already have this somewhere:
+extern std::atomic<bool> g_running;
+
+namespace livekit {
+class AudioSource;
+class VideoSource;
+} // namespace livekit
+
+void runNoiseCaptureLoop(const std::shared_ptr<livekit::AudioSource> &source,
+                         std::atomic<bool> &running_flag);
+
+void runFakeVideoCaptureLoop(
+    const std::shared_ptr<livekit::VideoSource> &source,
+    std::atomic<bool> &running_flag);
diff --git a/examples/simple_room/main.cpp b/examples/simple_room/main.cpp
index 85221c4..dd23091 100644
--- a/examples/simple_room/main.cpp
+++ b/examples/simple_room/main.cpp
@@ -1,3 +1,19 @@
+/*
+ * Copyright 2025 LiveKit, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
 #include <atomic>
 #include <chrono>
 #include <csignal>
@@ -9,6 +25,7 @@
 #include <vector>
 
 #include "livekit/livekit.h"
+#include "sdl_media_manager.h"
 #include "wav_audio_source.h"
 
 // TODO(shijing), remove this livekit_ffi.h as it should be internal only.
@@ -103,8 +120,37 @@ bool parse_args(int argc, char *argv[], std::string &url, std::string &token) {
   return !(url.empty() || token.empty());
 }
 
+class MainThreadDispatcher {
+public:
+  static void dispatch(std::function<void()> fn) {
+    std::lock_guard<std::mutex> lock(mutex_);
+    queue_.push(std::move(fn));
+  }
+
+  static void update() {
+    std::queue<std::function<void()>> local;
+
+    {
+      std::lock_guard<std::mutex> lock(mutex_);
+      std::swap(local, queue_);
+    }
+
+    // Run everything on main thread
+    while (!local.empty()) {
+      local.front()();
+      local.pop();
+    }
+  }
+
+private:
+  static inline std::mutex mutex_;
+  static inline std::queue<std::function<void()>> queue_;
+};
+
 class SimpleRoomDelegate : public livekit::RoomDelegate {
 public:
+  explicit SimpleRoomDelegate(SDLMediaManager &media) : media_(media) {}
+
   void onParticipantConnected(
       livekit::Room & /*room*/,
       const livekit::ParticipantConnectedEvent &ev) override {
@@ -114,101 +160,54 @@ class SimpleRoomDelegate : public livekit::RoomDelegate {
 
   void onTrackSubscribed(livekit::Room & /*room*/,
                          const livekit::TrackSubscribedEvent &ev) override {
+    const char *participant_identity =
+        ev.participant ? ev.participant->identity().c_str() : "<unknown>";
+    const std::string track_sid =
+        ev.publication ? ev.publication->sid() : "<unknown>";
+    const std::string track_name =
+        ev.publication ? ev.publication->name() : "<unknown>";
     std::cout << "[Room] track subscribed: participant_identity="
-              << ev.participant_identity << " track_sid=" << ev.track_sid
-              << " name=" << ev.track_name << "\n";
-    // TODO(shijing): when you expose Track kind/source here, you can check
-    // whether this is a video track and start a VideoStream-like consumer. Use
-    // the python code as reference.
-  }
-};
-
-// Test utils to run a capture loop to publish noisy audio frames to the room
-void runNoiseCaptureLoop(const std::shared_ptr<AudioSource> &source) {
-  const int sample_rate = source->sample_rate();
-  const int num_channels = source->num_channels();
-  const int frame_ms = 10;
-  const int samples_per_channel = sample_rate * frame_ms / 1000;
-
-  WavAudioSource WavAudioSource("data/welcome.wav", 48000, 1, false);
-  using Clock = std::chrono::steady_clock;
-  auto next_deadline = Clock::now();
-  while (g_running.load(std::memory_order_relaxed)) {
-    AudioFrame frame =
-        AudioFrame::create(sample_rate, num_channels, samples_per_channel);
-    WavAudioSource.fillFrame(frame);
-    try {
-      source->captureFrame(frame);
-    } catch (const std::exception &e) {
-      // If something goes wrong, log and break out
-      std::cerr << "Error in captureFrame: " << e.what() << std::endl;
-      break;
+              << participant_identity << " track_sid=" << track_sid
+              << " name=" << track_name;
+    if (ev.track) {
+      std::cout << " kind=" << static_cast<int>(ev.track->kind()) << "\n";
+    }
+    if (ev.publication) {
+      std::cout << " source=" << static_cast<int>(ev.publication->source())
+                << "\n";
     }
 
-    // Pace the loop to roughly real-time
-    next_deadline += std::chrono::milliseconds(frame_ms);
-    std::this_thread::sleep_until(next_deadline);
-  }
-
-  // Optionally clear queued audio on exit
-  try {
-    source->clearQueue();
-  } catch (...) {
-    // ignore errors on shutdown
-    std::cout << "Error in clearQueue" << std::endl;
-  }
-}
+    // If this is a VIDEO track, create a VideoStream and attach to renderer
+    if (ev.track && ev.track->kind() == TrackKind::KIND_VIDEO) {
+      VideoStream::Options opts;
+      opts.format = livekit::VideoBufferType::RGBA;
+      auto video_stream = VideoStream::fromTrack(ev.track, opts);
+      std::cout << "after fromTrack " << std::endl;
+      if (!video_stream) {
+        std::cerr << "Failed to create VideoStream for track " << track_sid
+                  << "\n";
+        return;
+      }
 
-void runFakeVideoCaptureLoop(const std::shared_ptr<VideoSource> &source) {
-  auto frame = LKVideoFrame::create(1280, 720, VideoBufferType::ARGB);
-  double framerate = 1.0 / 30;
-  while (g_running.load(std::memory_order_relaxed)) {
-    static auto start = std::chrono::high_resolution_clock::now();
-    float t = std::chrono::duration<float>(
-                  std::chrono::high_resolution_clock::now() - start)
-                  .count();
-    // Cycle every 4 seconds: 0=red, 1=green, 2=blue, 3 black
-    int stage = static_cast<int>(t) % 4;
-    std::vector<int> rgb(4);
-    switch (stage) {
-    case 0: // red
-      rgb[0] = 255;
-      rgb[1] = 0;
-      rgb[2] = 0;
-      break;
-    case 1: // green
-      rgb[0] = 0;
-      rgb[1] = 255;
-      rgb[2] = 0;
-      break;
-    case 2: // blue
-      rgb[0] = 0;
-      rgb[1] = 0;
-      rgb[2] = 255;
-      break;
-    case 4: // black
-      rgb[0] = 0;
-      rgb[1] = 0;
-      rgb[2] = 0;
+      MainThreadDispatcher::dispatch([this, video_stream] {
+        if (!media_.initRenderer(video_stream)) {
+          std::cerr << "SDLMediaManager::startRenderer failed for track\n";
+        }
+      });
+    } else if (ev.track && ev.track->kind() == TrackKind::KIND_AUDIO) {
+      AudioStream::Options opts;
+      auto audio_stream = AudioStream::fromTrack(ev.track, opts);
+      MainThreadDispatcher::dispatch([this, audio_stream] {
+        if (!media_.startSpeaker(audio_stream)) {
+          std::cerr << "SDLMediaManager::startRenderer failed for track\n";
+        }
+      });
     }
-    for (size_t i = 0; i < frame.dataSize(); i += 4) {
-      frame.data()[i] = 255;
-      frame.data()[i + 1] = rgb[0];
-      frame.data()[i + 2] = rgb[1];
-      frame.data()[i + 3] = rgb[2];
-    }
-    LKVideoFrame i420 = convertViaFfi(frame, VideoBufferType::I420, false);
-    try {
-      source->captureFrame(frame, 0, VideoRotation::VIDEO_ROTATION_0);
-    } catch (const std::exception &e) {
-      // If something goes wrong, log and break out
-      std::cerr << "Error in captureFrame: " << e.what() << std::endl;
-      break;
-    }
-
-    std::this_thread::sleep_for(std::chrono::duration<double>(framerate));
   }
-}
+
+private:
+  SDLMediaManager &media_;
+};
 
 } // namespace
 
@@ -225,16 +224,28 @@ int main(int argc, char *argv[]) {
     return 1;
   }
 
+  if (!SDL_Init(SDL_INIT_VIDEO)) {
+    std::cerr << "SDL_Init(SDL_INIT_VIDEO) failed: " << SDL_GetError() << "\n";
+    // You can choose to exit, or run in "headless" mode without renderer.
+    // return 1;
+  }
+
+  // Setup media;
+  SDLMediaManager media;
+
   std::cout << "Connecting to: " << url << std::endl;
 
   // Handle Ctrl-C to exit the idle loop
   std::signal(SIGINT, handle_sigint);
 
   livekit::Room room{};
-  SimpleRoomDelegate delegate;
+  SimpleRoomDelegate delegate(media);
   room.setDelegate(&delegate);
 
-  bool res = room.Connect(url, token);
+  RoomOptions options;
+  options.auto_subscribe = true;
+  options.dynacast = false;
+  bool res = room.Connect(url, token, options);
   std::cout << "Connect result is " << std::boolalpha << res << std::endl;
   if (!res) {
     std::cerr << "Failed to connect to room\n";
@@ -287,9 +298,7 @@ int main(int argc, char *argv[]) {
     std::cerr << "Failed to publish track: " << e.what() << std::endl;
   }
 
-  // TODO, if we have pre-buffering feature, we might consider starting the
-  // thread right after creating the source.
-  std::thread audioThread(runNoiseCaptureLoop, audioSource);
+  media.startMic(audioSource);
 
   // Setup Video Source / Track
   auto videoSource = std::make_shared<VideoSource>(1280, 720);
@@ -316,24 +325,24 @@ int main(int argc, char *argv[]) {
   } catch (const std::exception &e) {
     std::cerr << "Failed to publish track: " << e.what() << std::endl;
   }
-  std::thread videoThread(runFakeVideoCaptureLoop, videoSource);
+  media.startCamera(videoSource);
 
   // Keep the app alive until Ctrl-C so we continue receiving events,
   // similar to asyncio.run(main()) keeping the loop running.
   while (g_running.load()) {
-    std::this_thread::sleep_for(std::chrono::milliseconds(100));
+    MainThreadDispatcher::update();
+    media.render();
+    std::this_thread::sleep_for(std::chrono::milliseconds(10));
   }
 
   // Shutdown the audio thread.
-  if (audioThread.joinable()) {
-    audioThread.join();
-  }
+  media.stopMic();
+
   // Clean up the audio track publishment
   room.local_participant()->unpublishTrack(audioPub->sid());
 
-  if (videoThread.joinable()) {
-    videoThread.join();
-  }
+  media.stopCamera();
+
   // Clean up the video track publishment
   room.local_participant()->unpublishTrack(videoPub->sid());
 
diff --git a/examples/simple_room/sdl_media.cpp b/examples/simple_room/sdl_media.cpp
new file mode 100644
index 0000000..4961f51
--- /dev/null
+++ b/examples/simple_room/sdl_media.cpp
@@ -0,0 +1,227 @@
+/*
+ * Copyright 2025 LiveKit, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "sdl_media.h"
+
+#include <iostream>
+
+// ---------------------- SDLMicSource -----------------------------
+
+SDLMicSource::SDLMicSource(int sample_rate, int channels, int frame_samples,
+                           AudioCallback cb)
+    : sample_rate_(sample_rate), channels_(channels),
+      frame_samples_(frame_samples), callback_(std::move(cb)) {}
+
+SDLMicSource::~SDLMicSource() {
+  if (stream_) {
+    SDL_DestroyAudioStream(stream_);
+    stream_ = nullptr;
+  }
+}
+
+bool SDLMicSource::init() {
+  // desired output (what SDL will give us when we call SDL_GetAudioStreamData)
+  SDL_zero(spec_);
+  spec_.format = SDL_AUDIO_S16; // 16-bit signed
+  spec_.channels = static_cast<Uint8>(channels_);
+  spec_.freq = sample_rate_;
+
+  // Open default recording device as an audio stream
+  // This works for both playback and recording, depending on the device id.
+  stream_ = SDL_OpenAudioDeviceStream(
+      SDL_AUDIO_DEVICE_DEFAULT_RECORDING, // recording device
+      &spec_,
+      nullptr, // no callback, we'll poll
+      nullptr);
+
+  if (!stream_) {
+    std::cerr << "Failed to open recording stream: " << SDL_GetError() << "\n";
+    return false;
+  }
+
+  if (!SDL_ResumeAudioStreamDevice(stream_)) { // unpause device
+    std::cerr << "Failed to resume recording device: " << SDL_GetError()
+              << "\n";
+    return false;
+  }
+
+  return true;
+}
+
+void SDLMicSource::pump() {
+  if (!stream_ || !callback_)
+    return;
+
+  const int samples_per_frame_total = frame_samples_ * channels_;
+  const int bytes_per_frame = samples_per_frame_total * sizeof(int16_t);
+
+  // Only pull if at least one "frame" worth of audio is available
+  const int available = SDL_GetAudioStreamAvailable(stream_); // bytes
+  if (available < bytes_per_frame) {
+    return;
+  }
+
+  std::vector<int16_t> buffer(samples_per_frame_total);
+
+  const int got_bytes = SDL_GetAudioStreamData(stream_, buffer.data(),
+                                               bytes_per_frame); //
+
+  if (got_bytes <= 0) {
+    return; // nothing or error (log if you like)
+  }
+
+  const int got_samples_total = got_bytes / sizeof(int16_t);
+  const int got_samples_per_channel = got_samples_total / channels_;
+
+  callback_(buffer.data(), got_samples_per_channel, sample_rate_, channels_);
+}
+
+void SDLMicSource::pause() {
+  if (stream_) {
+    SDL_PauseAudioStreamDevice(stream_); //
+  }
+}
+
+void SDLMicSource::resume() {
+  if (stream_) {
+    SDL_ResumeAudioStreamDevice(stream_); //
+  }
+}
+
+// ---------------------- DDLSpeakerSink -----------------------------
+
+DDLSpeakerSink::DDLSpeakerSink(int sample_rate, int channels)
+    : sample_rate_(sample_rate), channels_(channels) {}
+
+DDLSpeakerSink::~DDLSpeakerSink() {
+  if (stream_) {
+    SDL_DestroyAudioStream(stream_); // also closes device
+    stream_ = nullptr;
+  }
+}
+
+bool DDLSpeakerSink::init() {
+  SDL_zero(spec_);
+  spec_.format = SDL_AUDIO_S16; // expect S16 input for playback
+  spec_.channels = static_cast<Uint8>(channels_);
+  spec_.freq = sample_rate_;
+
+  // Open default playback device as a stream.
+  stream_ = SDL_OpenAudioDeviceStream(SDL_AUDIO_DEVICE_DEFAULT_PLAYBACK, &spec_,
+                                      nullptr, // no callback; we'll push data
+                                      nullptr);
+
+  if (!stream_) {
+    std::cerr << "Failed to open playback stream: " << SDL_GetError() << "\n";
+    return false;
+  }
+
+  if (!SDL_ResumeAudioStreamDevice(stream_)) {
+    std::cerr << "Failed to resume playback device: " << SDL_GetError() << "\n";
+    return false;
+  }
+
+  return true;
+}
+
+void DDLSpeakerSink::enqueue(const int16_t *samples,
+                             int num_samples_per_channel) {
+  if (!stream_ || !samples)
+    return;
+
+  const int totalSamples = num_samples_per_channel * channels_;
+  const int bytes = totalSamples * static_cast<int>(sizeof(int16_t));
+
+  // SDL will resample / convert as needed on SDL_GetAudioStreamData() side.
+  if (!SDL_PutAudioStreamData(stream_, samples, bytes)) {
+    std::cerr << "SDL_PutAudioStreamData failed: " << SDL_GetError() << "\n";
+  }
+}
+
+void DDLSpeakerSink::pause() {
+  if (stream_) {
+    SDL_PauseAudioStreamDevice(stream_);
+  }
+}
+
+void DDLSpeakerSink::resume() {
+  if (stream_) {
+    SDL_ResumeAudioStreamDevice(stream_);
+  }
+}
+
+// ---------------------- SDLCamSource -----------------------------
+
+SDLCamSource::SDLCamSource(int desired_width, int desired_height,
+                           int desired_fps, SDL_PixelFormat pixel_format,
+                           VideoCallback cb)
+    : width_(desired_width), height_(desired_height), fps_(desired_fps),
+      format_(pixel_format), callback_(std::move(cb)) {}
+
+SDLCamSource::~SDLCamSource() {
+  if (camera_) {
+    SDL_CloseCamera(camera_); //
+    camera_ = nullptr;
+  }
+}
+
+bool SDLCamSource::init() {
+  int count = 0;
+  SDL_CameraID *cams = SDL_GetCameras(&count); //
+  if (!cams || count == 0) {
+    std::cerr << "No cameras available: " << SDL_GetError() << "\n";
+    if (cams)
+      SDL_free(cams);
+    return false;
+  }
+
+  SDL_CameraID camId = cams[0]; // first camera for now
+  SDL_free(cams);
+
+  SDL_zero(spec_);
+  spec_.format = format_;
+  spec_.colorspace = SDL_COLORSPACE_SRGB;
+  spec_.width = width_;
+  spec_.height = height_;
+  spec_.framerate_numerator = fps_;
+  spec_.framerate_denominator = 1;
+
+  camera_ = SDL_OpenCamera(camId, &spec_);
+  if (!camera_) {
+    std::cerr << "Failed to open camera: " << SDL_GetError() << "\n";
+    return false;
+  }
+
+  // On many platforms you must wait for SDL_EVENT_CAMERA_DEVICE_APPROVED;
+  // here we assume the app’s main loop is already handling that.
+  return true;
+}
+
+void SDLCamSource::pump() {
+  if (!camera_ || !callback_)
+    return;
+
+  Uint64 tsNS = 0;
+  SDL_Surface *surf = SDL_AcquireCameraFrame(camera_, &tsNS); // non-blocking
+  if (!surf) {
+    return;
+  }
+
+  callback_(static_cast<uint8_t *>(surf->pixels), surf->pitch, surf->w, surf->h,
+            surf->format, tsNS);
+
+  SDL_ReleaseCameraFrame(camera_, surf); //
+}
diff --git a/examples/simple_room/sdl_media.h b/examples/simple_room/sdl_media.h
new file mode 100644
index 0000000..a60bca6
--- /dev/null
+++ b/examples/simple_room/sdl_media.h
@@ -0,0 +1,128 @@
+/*
+ * Copyright 2025 LiveKit, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+#include <SDL3/SDL.h>
+#include <cstdint>
+#include <functional>
+#include <vector>
+
+// -------------------------
+// SDLMicSource
+// -------------------------
+// Periodically call pump() from your main loop or a capture thread.
+// It will pull 10ms frames from the mic (by default) and pass them to the
+// AudioCallback.
+class SDLMicSource {
+public:
+  using AudioCallback = std::function<void(
+      const int16_t *samples, // interleaved
+      int num_samples_per_channel, int sample_rate, int num_channels)>;
+
+  SDLMicSource(int sample_rate = 48000, int channels = 1,
+               int frame_samples = 480, AudioCallback cb = nullptr);
+
+  ~SDLMicSource();
+
+  // Initialize SDL audio stream for recording
+  bool init();
+
+  // Call regularly to pull mic data and send to callback.
+  void pump();
+
+  void pause();
+  void resume();
+
+  bool isValid() const { return stream_ != nullptr; }
+
+private:
+  SDL_AudioStream *stream_ = nullptr;
+  SDL_AudioSpec spec_{};
+  int sample_rate_;
+  int channels_;
+  int frame_samples_;
+  AudioCallback callback_;
+};
+
+// -------------------------
+// DDLSpeakerSink
+// -------------------------
+// For remote audio: when you get a decoded PCM frame,
+// call enqueue() with interleaved S16 samples.
+class DDLSpeakerSink {
+public:
+  DDLSpeakerSink(int sample_rate = 48000, int channels = 1);
+
+  ~DDLSpeakerSink();
+
+  bool init();
+
+  // Enqueue interleaved S16 samples for playback.
+  void enqueue(const int16_t *samples, int num_samples_per_channel);
+
+  void pause();
+  void resume();
+
+  bool isValid() const { return stream_ != nullptr; }
+
+private:
+  SDL_AudioStream *stream_ = nullptr;
+  SDL_AudioSpec spec_{};
+  int sample_rate_;
+  int channels_;
+};
+
+// -------------------------
+// SDLCamSource
+// -------------------------
+// Periodically call pump(); each time a new frame is available
+// it will invoke the VideoCallback with the raw pixels.
+//
+// NOTE: pixels are in the SDL_Surface format returned by the camera
+// (often SDL_PIXELFORMAT_ARGB8888). You can either:
+//  - convert to whatever your LiveKit video source expects, or
+//  - tell LiveKit that this is ARGB with the given stride.
+class SDLCamSource {
+public:
+  using VideoCallback = std::function<void(
+      const uint8_t *pixels,
+      int pitch, // bytes per row
+      int width, int height, SDL_PixelFormat format, Uint64 timestampNS)>;
+
+  SDLCamSource(int desired_width = 1280, int desired_height = 720,
+               int desired_fps = 30,
+               SDL_PixelFormat pixelFormat = SDL_PIXELFORMAT_RGBA8888,
+               VideoCallback cb = nullptr);
+
+  ~SDLCamSource();
+
+  bool init(); // open first available camera with (approximately) given spec
+
+  // Call regularly; will call VideoCallback when a frame is available.
+  void pump();
+
+  bool isValid() const { return camera_ != nullptr; }
+
+private:
+  SDL_Camera *camera_ = nullptr;
+  SDL_CameraSpec spec_{};
+  int width_;
+  int height_;
+  int fps_;
+  SDL_PixelFormat format_;
+  VideoCallback callback_;
+};
diff --git a/examples/simple_room/sdl_media_manager.cpp b/examples/simple_room/sdl_media_manager.cpp
new file mode 100644
index 0000000..58d3bd5
--- /dev/null
+++ b/examples/simple_room/sdl_media_manager.cpp
@@ -0,0 +1,557 @@
+/*
+ * Copyright 2025 LiveKit, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "sdl_media_manager.h"
+
+#include "fallback_capture.h"
+#include "livekit/livekit.h"
+#include "sdl_video_renderer.h"
+#include <cstring>
+#include <iostream>
+#include <vector>
+
+// ---------------- SDLMicSource ----------------
+
+class SDLMicSource {
+public:
+  using AudioCallback =
+      std::function<void(const int16_t *samples, int num_samples_per_channel,
+                         int sample_rate, int num_channels)>;
+
+  SDLMicSource(int sample_rate, int channels, int frame_samples,
+               AudioCallback cb)
+      : sample_rate_(sample_rate), channels_(channels),
+        frame_samples_(frame_samples), callback_(std::move(cb)) {}
+
+  ~SDLMicSource() {
+    if (stream_) {
+      SDL_DestroyAudioStream(stream_);
+      stream_ = nullptr;
+    }
+  }
+
+  bool init() {
+    SDL_zero(spec_);
+    spec_.format = SDL_AUDIO_S16;
+    spec_.channels = static_cast<Uint8>(channels_);
+    spec_.freq = sample_rate_;
+
+    stream_ = SDL_OpenAudioDeviceStream(SDL_AUDIO_DEVICE_DEFAULT_RECORDING,
+                                        &spec_, nullptr, nullptr);
+
+    if (!stream_) {
+      std::cerr << "Failed to open recording stream: " << SDL_GetError()
+                << std::endl;
+      return false;
+    }
+
+    if (!SDL_ResumeAudioStreamDevice(stream_)) {
+      std::cerr << "Failed to resume recording device: " << SDL_GetError()
+                << std::endl;
+      return false;
+    }
+
+    return true;
+  }
+
+  void pump() {
+    if (!stream_ || !callback_)
+      return;
+
+    const int totalSamples = frame_samples_ * channels_;
+    const int bytes_per_frame =
+        totalSamples * static_cast<int>(sizeof(int16_t));
+
+    const int available = SDL_GetAudioStreamAvailable(stream_);
+    if (available < bytes_per_frame)
+      return;
+
+    std::vector<int16_t> buffer(totalSamples);
+
+    const int got_bytes =
+        SDL_GetAudioStreamData(stream_, buffer.data(), bytes_per_frame);
+    if (got_bytes <= 0)
+      return;
+
+    const int samplesTotal = got_bytes / static_cast<int>(sizeof(int16_t));
+    const int samplesPerChannel = samplesTotal / channels_;
+
+    callback_(buffer.data(), samplesPerChannel, sample_rate_, channels_);
+  }
+
+private:
+  SDL_AudioStream *stream_ = nullptr;
+  SDL_AudioSpec spec_{};
+  int sample_rate_;
+  int channels_;
+  int frame_samples_;
+  AudioCallback callback_;
+};
+
+// ---------------- SDLCamSource ----------------
+
+class SDLCamSource {
+public:
+  using VideoCallback = std::function<void(
+      const uint8_t *pixels, int pitch, int width, int height,
+      SDL_PixelFormat format, Uint64 timestampNS)>;
+
+  SDLCamSource(int desired_width, int desired_height, int desired_fps,
+               SDL_PixelFormat pixelFormat, VideoCallback cb)
+      : width_(desired_width), height_(desired_height), fps_(desired_fps),
+        format_(pixelFormat), callback_(std::move(cb)) {}
+
+  ~SDLCamSource() {
+    if (camera_) {
+      SDL_CloseCamera(camera_);
+      camera_ = nullptr;
+    }
+  }
+
+  bool init() {
+    int count = 0;
+    SDL_CameraID *cams = SDL_GetCameras(&count);
+    if (!cams || count == 0) {
+      std::cerr << "No camera devices found (SDL): " << SDL_GetError()
+                << std::endl;
+      if (cams)
+        SDL_free(cams);
+      return false;
+    }
+
+    SDL_CameraID camId = cams[0];
+    SDL_free(cams);
+
+    SDL_zero(spec_);
+    spec_.format = format_;
+    spec_.width = width_;
+    spec_.height = height_;
+    spec_.framerate_numerator = fps_;
+    spec_.framerate_denominator = 1;
+
+    camera_ = SDL_OpenCamera(camId, &spec_);
+    if (!camera_) {
+      std::cerr << "Failed to open camera: " << SDL_GetError() << std::endl;
+      return false;
+    }
+
+    return true;
+  }
+
+  void pump() {
+    if (!camera_ || !callback_)
+      return;
+
+    Uint64 tsNS = 0;
+    SDL_Surface *surf = SDL_AcquireCameraFrame(camera_, &tsNS);
+    if (!surf)
+      return;
+
+    callback_(static_cast<uint8_t *>(surf->pixels), surf->pitch, surf->w,
+              surf->h, surf->format, tsNS);
+
+    SDL_ReleaseCameraFrame(camera_, surf);
+  }
+
+private:
+  SDL_Camera *camera_ = nullptr;
+  SDL_CameraSpec spec_{};
+  int width_;
+  int height_;
+  int fps_;
+  SDL_PixelFormat format_;
+  VideoCallback callback_;
+};
+
+// ---------------- SDLMediaManager implementation ----------------
+
+SDLMediaManager::SDLMediaManager() = default;
+
+SDLMediaManager::~SDLMediaManager() {
+  stopMic();
+  stopCamera();
+  stopSpeaker();
+}
+
+bool SDLMediaManager::ensureSDLInit(Uint32 flags) {
+  if ((SDL_WasInit(flags) & flags) == flags) {
+    return true; // already init
+  }
+  if (!SDL_InitSubSystem(flags)) {
+    std::cerr << "SDL_InitSubSystem failed (flags=" << flags
+              << "): " << SDL_GetError() << std::endl;
+    return false;
+  }
+  return true;
+}
+
+// ---------- Mic control ----------
+
+bool SDLMediaManager::startMic(
+    const std::shared_ptr<AudioSource> &audio_source) {
+  stopMic();
+
+  if (!audio_source) {
+    std::cerr << "startMic: audioSource is null\n";
+    return false;
+  }
+
+  mic_source_ = audio_source;
+  mic_running_.store(true, std::memory_order_relaxed);
+
+  // Try SDL path
+  if (!ensureSDLInit(SDL_INIT_AUDIO)) {
+    std::cerr << "No SDL audio, falling back to noise loop.\n";
+    mic_using_sdl_ = false;
+    mic_thread_ =
+        std::thread(runNoiseCaptureLoop, mic_source_, std::ref(mic_running_));
+    return true;
+  }
+
+  int recCount = 0;
+  SDL_AudioDeviceID *recDevs = SDL_GetAudioRecordingDevices(&recCount);
+  if (!recDevs || recCount == 0) {
+    std::cerr << "No microphone devices found, falling back to noise loop.\n";
+    if (recDevs)
+      SDL_free(recDevs);
+    mic_using_sdl_ = false;
+    mic_thread_ =
+        std::thread(runNoiseCaptureLoop, mic_source_, std::ref(mic_running_));
+    return true;
+  }
+  SDL_free(recDevs);
+
+  // We have at least one mic; use SDL
+  mic_using_sdl_ = true;
+
+  mic_sdl_ = std::make_unique<SDLMicSource>(
+      mic_source_->sample_rate(), mic_source_->num_channels(),
+      mic_source_->sample_rate() / 100, // ~10ms
+      [src = mic_source_](const int16_t *samples, int num_samples_per_channel,
+                          int sample_rate, int num_channels) {
+        AudioFrame frame = AudioFrame::create(sample_rate, num_channels,
+                                              num_samples_per_channel);
+        std::memcpy(frame.data().data(), samples,
+                    num_samples_per_channel * num_channels * sizeof(int16_t));
+        try {
+          src->captureFrame(frame);
+        } catch (const std::exception &e) {
+          std::cerr << "Error in captureFrame (SDL mic): " << e.what()
+                    << std::endl;
+        }
+      });
+
+  if (!mic_sdl_->init()) {
+    std::cerr << "Failed to init SDL mic, falling back to noise loop.\n";
+    mic_using_sdl_ = false;
+    mic_sdl_.reset();
+    mic_thread_ =
+        std::thread(runNoiseCaptureLoop, mic_source_, std::ref(mic_running_));
+    return true;
+  }
+
+  mic_thread_ = std::thread(&SDLMediaManager::micLoopSDL, this);
+  return true;
+}
+
+void SDLMediaManager::micLoopSDL() {
+  while (mic_running_.load(std::memory_order_relaxed)) {
+    mic_sdl_->pump();
+    std::this_thread::sleep_for(std::chrono::milliseconds(1));
+  }
+}
+
+void SDLMediaManager::stopMic() {
+  mic_running_.store(false, std::memory_order_relaxed);
+  if (mic_thread_.joinable()) {
+    mic_thread_.join();
+  }
+  mic_sdl_.reset();
+  mic_source_.reset();
+}
+
+// ---------- Camera control ----------
+
+bool SDLMediaManager::startCamera(
+    const std::shared_ptr<VideoSource> &video_source) {
+  stopCamera();
+
+  if (!video_source) {
+    std::cerr << "startCamera: videoSource is null\n";
+    return false;
+  }
+
+  cam_source_ = video_source;
+  cam_running_.store(true, std::memory_order_relaxed);
+
+  // Try SDL
+  if (!ensureSDLInit(SDL_INIT_CAMERA)) {
+    std::cerr << "No SDL camera subsystem, using fake video loop.\n";
+    cam_using_sdl_ = false;
+    cam_thread_ = std::thread(runFakeVideoCaptureLoop, cam_source_,
+                              std::ref(cam_running_));
+    return true;
+  }
+
+  int camCount = 0;
+  SDL_CameraID *cams = SDL_GetCameras(&camCount);
+  if (!cams || camCount == 0) {
+    std::cerr << "No camera devices found, using fake video loop.\n";
+    if (cams)
+      SDL_free(cams);
+    cam_using_sdl_ = false;
+    cam_thread_ = std::thread(runFakeVideoCaptureLoop, cam_source_,
+                              std::ref(cam_running_));
+    return true;
+  }
+  SDL_free(cams);
+
+  cam_using_sdl_ = true;
+  can_sdl_ = std::make_unique<SDLCamSource>(
+      1280, 720, 30,
+      SDL_PIXELFORMAT_RGBA32, // Note SDL_PIXELFORMAT_RGBA8888 is not compatable
+                              // with Livekit RGBA format.
+      [src = cam_source_](const uint8_t *pixels, int pitch, int width,
+                          int height, SDL_PixelFormat /*fmt*/,
+                          Uint64 timestampNS) {
+        auto frame = LKVideoFrame::create(width, height, VideoBufferType::RGBA);
+        uint8_t *dst = frame.data();
+        const int dstPitch = width * 4;
+
+        for (int y = 0; y < height; ++y) {
+          std::memcpy(dst + y * dstPitch, pixels + y * pitch, dstPitch);
+        }
+
+        try {
+          src->captureFrame(frame, timestampNS / 1000,
+                            VideoRotation::VIDEO_ROTATION_0);
+        } catch (const std::exception &e) {
+          std::cerr << "Error in captureFrame (SDL cam): " << e.what()
+                    << std::endl;
+        }
+      });
+
+  if (!can_sdl_->init()) {
+    std::cerr << "Failed to init SDL camera, using fake video loop.\n";
+    cam_using_sdl_ = false;
+    can_sdl_.reset();
+    cam_thread_ = std::thread(runFakeVideoCaptureLoop, cam_source_,
+                              std::ref(cam_running_));
+    return true;
+  }
+
+  cam_thread_ = std::thread(&SDLMediaManager::cameraLoopSDL, this);
+  return true;
+}
+
+void SDLMediaManager::cameraLoopSDL() {
+  while (cam_running_.load(std::memory_order_relaxed)) {
+    can_sdl_->pump();
+    std::this_thread::sleep_for(std::chrono::milliseconds(1));
+  }
+}
+
+void SDLMediaManager::stopCamera() {
+  cam_running_.store(false, std::memory_order_relaxed);
+  if (cam_thread_.joinable()) {
+    cam_thread_.join();
+  }
+  can_sdl_.reset();
+  cam_source_.reset();
+}
+
+// ---------- Speaker control (placeholder) ----------
+
+bool SDLMediaManager::startSpeaker(
+    const std::shared_ptr<AudioStream> &audio_stream) {
+  stopSpeaker();
+
+  if (!audio_stream) {
+    std::cerr << "startSpeaker: audioStream is null\n";
+    return false;
+  }
+
+  if (!ensureSDLInit(SDL_INIT_AUDIO)) {
+    std::cerr << "startSpeaker: SDL_INIT_AUDIO failed\n";
+    return false;
+  }
+
+  speaker_stream_ = audio_stream;
+  speaker_running_.store(true, std::memory_order_relaxed);
+
+  // Note, we don't open the speaker since the format is unknown yet.
+  // Instead, open the speaker in the speakerLoopSDL thread with the native
+  // format.
+  try {
+    speaker_thread_ = std::thread(&SDLMediaManager::speakerLoopSDL, this);
+  } catch (const std::exception &e) {
+    std::cerr << "startSpeaker: failed to start speaker thread: " << e.what()
+              << "\n";
+    speaker_running_.store(false, std::memory_order_relaxed);
+    speaker_stream_.reset();
+    return false;
+  }
+
+  return true;
+}
+
+void SDLMediaManager::speakerLoopSDL() {
+  SDL_AudioStream *localStream = nullptr;
+  SDL_AudioDeviceID dev = 0;
+
+  while (speaker_running_.load(std::memory_order_relaxed)) {
+    if (!speaker_stream_) {
+      break;
+    }
+
+    livekit::AudioFrameEvent ev;
+    if (!speaker_stream_->read(ev)) {
+      // EOS or closed
+      break;
+    }
+
+    const livekit::AudioFrame &frame = ev.frame;
+    const auto &data = frame.data();
+    if (data.empty()) {
+      continue;
+    }
+
+    // Lazily open SDL audio stream based on the first frame's format, so no
+    // resampler is needed.
+    if (!localStream) {
+      SDL_AudioSpec want{};
+      want.format = SDL_AUDIO_S16;
+      want.channels = static_cast<Uint8>(frame.num_channels());
+      want.freq = frame.sample_rate();
+
+      localStream =
+          SDL_OpenAudioDeviceStream(SDL_AUDIO_DEVICE_DEFAULT_PLAYBACK, &want,
+                                    /*callback=*/nullptr,
+                                    /*userdata=*/nullptr);
+
+      if (!localStream) {
+        std::cerr << "speakerLoopSDL: SDL_OpenAudioDeviceStream failed: "
+                  << SDL_GetError() << "\n";
+        break;
+      }
+
+      sdl_audio_stream_ = localStream; // store if you want to inspect later
+
+      dev = SDL_GetAudioStreamDevice(localStream);
+      if (dev == 0) {
+        std::cerr << "speakerLoopSDL: SDL_GetAudioStreamDevice failed: "
+                  << SDL_GetError() << "\n";
+        break;
+      }
+
+      if (!SDL_ResumeAudioDevice(dev)) {
+        std::cerr << "speakerLoopSDL: SDL_ResumeAudioDevice failed: "
+                  << SDL_GetError() << "\n";
+        break;
+      }
+    }
+
+    // Push PCM to SDL. We assume frames are already S16, interleaved, matching
+    // sample_rate / channels we used above.
+    const int numBytes = static_cast<int>(data.size() * sizeof(std::int16_t));
+
+    if (!SDL_PutAudioStreamData(localStream, data.data(), numBytes)) {
+      std::cerr << "speakerLoopSDL: SDL_PutAudioStreamData failed: "
+                << SDL_GetError() << "\n";
+      break;
+    }
+
+    // Tiny sleep to avoid busy loop; SDL buffers internally.
+    SDL_Delay(2);
+  }
+
+  if (localStream) {
+    SDL_DestroyAudioStream(localStream);
+    localStream = nullptr;
+    sdl_audio_stream_ = nullptr;
+  }
+
+  speaker_running_.store(false, std::memory_order_relaxed);
+}
+
+void SDLMediaManager::stopSpeaker() {
+  speaker_running_.store(false, std::memory_order_relaxed);
+  if (speaker_thread_.joinable()) {
+    speaker_thread_.join();
+  }
+  if (sdl_audio_stream_) {
+    SDL_DestroyAudioStream(sdl_audio_stream_);
+    sdl_audio_stream_ = nullptr;
+  }
+  speaker_stream_.reset();
+}
+
+// ---------- Renderer control (placeholder) ----------
+
+bool SDLMediaManager::initRenderer(
+    const std::shared_ptr<VideoStream> &video_stream) {
+  if (!video_stream) {
+    std::cerr << "startRenderer: videoStream is null\n";
+    return false;
+  }
+  // Ensure SDL video subsystem is initialized
+  if (!ensureSDLInit(SDL_INIT_VIDEO)) {
+    std::cerr << "startRenderer: SDL_INIT_VIDEO failed\n";
+    return false;
+  }
+  renderer_stream_ = video_stream;
+  renderer_running_.store(true, std::memory_order_relaxed);
+
+  // Lazily create the SDLVideoRenderer
+  if (!sdl_renderer_) {
+    sdl_renderer_ = std::make_unique<SDLVideoRenderer>();
+    // You can tune these dimensions or even make them options
+    if (!sdl_renderer_->init("LiveKit Remote Video", 1280, 720)) {
+      std::cerr << "startRenderer: SDLVideoRenderer::init failed\n";
+      sdl_renderer_.reset();
+      renderer_stream_.reset();
+      renderer_running_.store(false, std::memory_order_relaxed);
+      return false;
+    }
+  }
+
+  // Start the SDL renderer's own render thread
+  sdl_renderer_->setStream(renderer_stream_);
+
+  return true;
+}
+
+void SDLMediaManager::shutdownRenderer() {
+  renderer_running_.store(false, std::memory_order_relaxed);
+
+  // Shut down SDL renderer thread if it exists
+  if (sdl_renderer_) {
+    sdl_renderer_->shutdown();
+  }
+
+  // Old renderer_thread_ is no longer used, but if you still have it:
+  if (renderer_thread_.joinable()) {
+    renderer_thread_.join();
+  }
+
+  renderer_stream_.reset();
+}
+
+void SDLMediaManager::render() {
+  if (renderer_running_.load(std::memory_order_relaxed) && sdl_renderer_) {
+    sdl_renderer_->render();
+  }
+}
\ No newline at end of file
diff --git a/examples/simple_room/sdl_media_manager.h b/examples/simple_room/sdl_media_manager.h
new file mode 100644
index 0000000..cd9ba46
--- /dev/null
+++ b/examples/simple_room/sdl_media_manager.h
@@ -0,0 +1,109 @@
+/*
+ * Copyright 2025 LiveKit, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+#include <atomic>
+#include <memory>
+#include <thread>
+
+#include <SDL3/SDL.h>
+#include <SDL3/SDL_audio.h>
+#include <SDL3/SDL_camera.h>
+
+#include "wav_audio_source.h"
+
+namespace livekit {
+class AudioSource;
+class VideoSource;
+class AudioStream;
+class VideoStream;
+} // namespace livekit
+
+// Forward-declared SDL helpers (you can also keep these separate if you like)
+class SDLMicSource;
+class SDLCamSource;
+class SDLVideoRenderer;
+
+// SDLMediaManager gives you dedicated control over:
+// - mic capture  -> AudioSource
+// - camera capture -> VideoSource
+// - speaker playback -> AudioStream (TODO: integrate your API)
+// - renderer -> VideoStream (TODO: integrate your API)
+class SDLMediaManager {
+public:
+  SDLMediaManager();
+  ~SDLMediaManager();
+
+  // Mic (local capture -> AudioSource)
+  bool startMic(const std::shared_ptr<livekit::AudioSource> &audio_source);
+  void stopMic();
+
+  // Camera (local capture -> VideoSource)
+  bool startCamera(const std::shared_ptr<livekit::VideoSource> &video_source);
+  void stopCamera();
+
+  // Speaker (remote audio playback)
+  bool startSpeaker(const std::shared_ptr<livekit::AudioStream> &audio_stream);
+  void stopSpeaker();
+
+  // Renderer (remote video rendering)
+  // Following APIs must be called on main thread
+  bool initRenderer(const std::shared_ptr<VideoStream> &video_stream);
+  void shutdownRenderer();
+  void render();
+
+private:
+  // ---- SDL bootstrap helpers ----
+  bool ensureSDLInit(Uint32 flags);
+
+  // ---- Mic helpers ----
+  void micLoopSDL();
+  void micLoopNoise();
+
+  // ---- Camera helpers ----
+  void cameraLoopSDL();
+  void cameraLoopFake();
+
+  // ---- Speaker helpers (TODO: wire AudioStream -> SDL audio) ----
+  void speakerLoopSDL();
+
+  // Mic
+  std::shared_ptr<livekit::AudioSource> mic_source_;
+  std::unique_ptr<SDLMicSource> mic_sdl_;
+  std::thread mic_thread_;
+  std::atomic<bool> mic_running_{false};
+  bool mic_using_sdl_ = false;
+
+  // Camera
+  std::shared_ptr<livekit::VideoSource> cam_source_;
+  std::unique_ptr<SDLCamSource> can_sdl_;
+  std::thread cam_thread_;
+  std::atomic<bool> cam_running_{false};
+  bool cam_using_sdl_ = false;
+
+  // Speaker (remote audio) – left mostly as a placeholder
+  std::shared_ptr<livekit::AudioStream> speaker_stream_;
+  std::thread speaker_thread_;
+  std::atomic<bool> speaker_running_{false};
+  SDL_AudioStream *sdl_audio_stream_ = nullptr;
+
+  // Renderer (remote video) – left mostly as a placeholder
+  std::unique_ptr<SDLVideoRenderer> sdl_renderer_;
+  std::shared_ptr<livekit::VideoStream> renderer_stream_;
+  std::thread renderer_thread_;
+  std::atomic<bool> renderer_running_{false};
+};
diff --git a/examples/simple_room/sdl_video_renderer.cpp b/examples/simple_room/sdl_video_renderer.cpp
new file mode 100644
index 0000000..55aa799
--- /dev/null
+++ b/examples/simple_room/sdl_video_renderer.cpp
@@ -0,0 +1,165 @@
+/*
+ * Copyright 2025 LiveKit
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an “AS IS” BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "sdl_video_renderer.h"
+
+#include "livekit/livekit.h"
+#include <cstring>
+#include <iostream>
+
+using namespace livekit;
+
+SDLVideoRenderer::SDLVideoRenderer() = default;
+
+SDLVideoRenderer::~SDLVideoRenderer() { shutdown(); }
+
+bool SDLVideoRenderer::init(const char *title, int width, int height) {
+  width_ = width;
+  height_ = height;
+
+  // Assume SDL_Init(SDL_INIT_VIDEO) already called in main()
+  window_ = SDL_CreateWindow(title, width_, height_, 0);
+  if (!window_) {
+    std::cerr << "SDL_CreateWindow failed: " << SDL_GetError() << "\n";
+    return false;
+  }
+
+  renderer_ = SDL_CreateRenderer(window_, nullptr);
+  if (!renderer_) {
+    std::cerr << "SDL_CreateRenderer failed: " << SDL_GetError() << "\n";
+    return false;
+  }
+
+  // Note, web will send out BGRA as default, and we can't use ARGB since ffi
+  // does not support converting from BGRA to ARGB.
+  texture_ = SDL_CreateTexture(renderer_, SDL_PIXELFORMAT_RGBA8888,
+                               SDL_TEXTUREACCESS_STREAMING, width_, height_);
+  if (!texture_) {
+    std::cerr << "SDL_CreateTexture failed: " << SDL_GetError() << "\n";
+    return false;
+  }
+
+  return true;
+}
+
+void SDLVideoRenderer::shutdown() {
+  if (texture_) {
+    SDL_DestroyTexture(texture_);
+    texture_ = nullptr;
+  }
+  if (renderer_) {
+    SDL_DestroyRenderer(renderer_);
+    renderer_ = nullptr;
+  }
+  if (window_) {
+    SDL_DestroyWindow(window_);
+    window_ = nullptr;
+  }
+
+  stream_.reset();
+}
+
+void SDLVideoRenderer::setStream(std::shared_ptr<livekit::VideoStream> stream) {
+  stream_ = std::move(stream);
+}
+
+void SDLVideoRenderer::render() {
+  // 0) Basic sanity
+  if (!window_ || !renderer_) {
+    return;
+  }
+
+  // 1) Pump SDL events on the main thread
+  SDL_Event e;
+  while (SDL_PollEvent(&e)) {
+    if (e.type == SDL_EVENT_QUIT) {
+      // TODO: set some global or member flag if you want to quit the app
+    }
+  }
+
+  // 2) If no stream, nothing to render
+  if (!stream_) {
+    return;
+  }
+
+  // 3) Read a frame from VideoStream (blocking until one is available)
+  livekit::VideoFrameEvent vfe;
+  bool gotFrame = stream_->read(vfe);
+  if (!gotFrame) {
+    // EOS / closed – nothing more to render
+    return;
+  }
+
+  livekit::LKVideoFrame &frame = vfe.frame;
+
+  // 4) Ensure the frame is RGBA.
+  //    Ideally you requested RGBA from VideoStream::Options so this is a no-op.
+  if (frame.type() != livekit::VideoBufferType::RGBA) {
+    try {
+      frame = frame.convert(livekit::VideoBufferType::RGBA, false);
+    } catch (const std::exception &ex) {
+      std::cerr << "SDLVideoRenderer: convert to RGBA failed: " << ex.what()
+                << "\n";
+      return;
+    }
+  }
+
+  // Handle size change: recreate texture if needed
+  if (frame.width() != width_ || frame.height() != height_) {
+    width_ = frame.width();
+    height_ = frame.height();
+
+    if (texture_) {
+      SDL_DestroyTexture(texture_);
+      texture_ = nullptr;
+    }
+    texture_ = SDL_CreateTexture(
+        renderer_,
+        SDL_PIXELFORMAT_RGBA32, // Note, SDL_PIXELFORMAT_RGBA8888 is not
+                                // compatible with Livekit RGBA format.
+        SDL_TEXTUREACCESS_STREAMING, width_, height_);
+    if (!texture_) {
+      std::cerr << "SDLVideoRenderer: SDL_CreateTexture failed: "
+                << SDL_GetError() << "\n";
+      return;
+    }
+  }
+
+  // 6) Upload RGBA data to SDL texture
+  void *pixels = nullptr;
+  int pitch = 0;
+  if (!SDL_LockTexture(texture_, nullptr, &pixels, &pitch)) {
+    std::cerr << "SDLVideoRenderer: SDL_LockTexture failed: " << SDL_GetError()
+              << "\n";
+    return;
+  }
+
+  const std::uint8_t *src = frame.data();
+  const int srcPitch = frame.width() * 4; // RGBA: 4 bytes per pixel
+
+  for (int y = 0; y < frame.height(); ++y) {
+    std::memcpy(static_cast<std::uint8_t *>(pixels) + y * pitch,
+                src + y * srcPitch, srcPitch);
+  }
+
+  SDL_UnlockTexture(texture_);
+
+  // 7) Present
+  SDL_SetRenderDrawColor(renderer_, 0, 0, 0, 255);
+  SDL_RenderClear(renderer_);
+  SDL_RenderTexture(renderer_, texture_, nullptr, nullptr);
+  SDL_RenderPresent(renderer_);
+}
diff --git a/examples/simple_room/sdl_video_renderer.h b/examples/simple_room/sdl_video_renderer.h
new file mode 100644
index 0000000..6e666ea
--- /dev/null
+++ b/examples/simple_room/sdl_video_renderer.h
@@ -0,0 +1,52 @@
+/*
+ * Copyright 2025 LiveKit
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an “AS IS” BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+#include <SDL3/SDL.h>
+#include <atomic>
+#include <memory>
+#include <thread>
+
+namespace livekit {
+class VideoStream;
+}
+
+class SDLVideoRenderer {
+public:
+  SDLVideoRenderer();
+  ~SDLVideoRenderer();
+
+  // Must be called on main thread, after SDL_Init(SDL_INIT_VIDEO).
+  bool init(const char *title, int width, int height);
+
+  // Set/replace the stream to render. Safe to call from main thread.
+  void setStream(std::shared_ptr<livekit::VideoStream> stream);
+
+  // Called on main thread each tick to pump events and draw latest frame.
+  void render();
+
+  void shutdown(); // destroy window/renderer/texture
+
+private:
+  SDL_Window *window_ = nullptr;
+  SDL_Renderer *renderer_ = nullptr;
+  SDL_Texture *texture_ = nullptr;
+
+  std::shared_ptr<livekit::VideoStream> stream_;
+  int width_ = 0;
+  int height_ = 0;
+};
diff --git a/include/livekit/audio_frame.h b/include/livekit/audio_frame.h
index ae43351..529d658 100644
--- a/include/livekit/audio_frame.h
+++ b/include/livekit/audio_frame.h
@@ -42,6 +42,7 @@ class AudioFrame {
    */
   AudioFrame(std::vector<std::int16_t> data, int sample_rate, int num_channels,
              int samples_per_channel);
+  AudioFrame(); // Default constructor
 
   /**
    * Create a new zero-initialized AudioFrame instance.
diff --git a/include/livekit/audio_stream.h b/include/livekit/audio_stream.h
index 84aac8a..a1b043c 100644
--- a/include/livekit/audio_stream.h
+++ b/include/livekit/audio_stream.h
@@ -39,24 +39,48 @@ struct AudioFrameEvent {
   AudioFrame frame;
 };
 
+// Represents a pull-based stream of decoded PCM audio frames coming from
+// a remote (or local) LiveKit track. Similar to VideoStream, but for audio.
+//
+// Typical usage:
+//
+//   AudioStream::Options opts;
+//   auto stream = AudioStream::fromTrack(remoteAudioTrack, opts);
+//
+//   AudioFrameEvent ev;
+//   while (stream->read(ev)) {
+//     // ev.frame contains interleaved int16 PCM samples
+//   }
+//
+//   stream->close();  // optional, called automatically in destructor
 class AudioStream {
 public:
+  // Configuration options for AudioStream creation.
   struct Options {
-    std::size_t capacity{0}; // 0 = unbounded
-    int sample_rate{48000};
-    int num_channels{1};
-    std::string noise_cancellation_module;       // empty = disabled
-    std::string noise_cancellation_options_json; // empty = no options
+    // Maximum number of AudioFrameEvent items buffered in the internal queue.
+    // 0 means "unbounded" (the queue can grow without limit).
+    //
+    // Using a small non-zero capacity gives ring-buffer semantics:
+    // if the queue is full, the oldest frame is dropped when a new one arrives.
+    std::size_t capacity{0};
+
+    // Optional: name of a noise cancellation module to enable for this stream.
+    // Empty string means "no noise cancellation".
+    std::string noise_cancellation_module;
+
+    // Optional: JSON-encoded configuration for the noise cancellation module.
+    // Empty string means "use module defaults".
+    std::string noise_cancellation_options_json;
   };
 
   // Factory: create an AudioStream bound to a specific Track
-  static std::unique_ptr<AudioStream>
-  from_track(const std::shared_ptr<Track> &track, const Options &options);
+  static std::shared_ptr<AudioStream>
+  fromTrack(const std::shared_ptr<Track> &track, const Options &options);
 
   // Factory: create an AudioStream from a Participant + TrackSource
-  static std::unique_ptr<AudioStream> from_participant(Participant &participant,
-                                                       TrackSource track_source,
-                                                       const Options &options);
+  static std::shared_ptr<AudioStream> fromParticipant(Participant &participant,
+                                                      TrackSource track_source,
+                                                      const Options &options);
 
   ~AudioStream();
 
@@ -65,28 +89,35 @@ class AudioStream {
   AudioStream(AudioStream &&) noexcept;
   AudioStream &operator=(AudioStream &&) noexcept;
 
-  /// Blocking read: returns true if a frame was delivered,
-  /// false if the stream has ended (EOS or closed).
+  /// Blocking read: waits until there is an AudioFrameEvent available in the
+  /// internal queue, or the stream reaches EOS / is closed.
+  ///
+  /// \param out_event  On success, filled with the next audio frame.
+  /// \return true if a frame was delivered; false if the stream ended
+  ///         (end-of-stream or close()) and no more data is available.
   bool read(AudioFrameEvent &out_event);
 
-  /// Signal that we are no longer interested in frames.
-  /// Disposes the underlying FFI stream and removes the listener.
+  /// Signal that we are no longer interested in audio frames.
+  ///
+  /// This disposes the underlying FFI audio stream, unregisters the listener
+  /// from FfiClient, marks the stream as closed, and wakes any blocking read().
+  /// After calling close(), further calls to read() will return false.
   void close();
 
 private:
   AudioStream() = default;
 
-  void init_from_track(const std::shared_ptr<Track> &track,
-                       const Options &options);
-  void init_from_participant(Participant &participant, TrackSource track_source,
-                             const Options &options);
+  void initFromTrack(const std::shared_ptr<Track> &track,
+                     const Options &options);
+  void initFromParticipant(Participant &participant, TrackSource track_source,
+                           const Options &options);
 
   // FFI event handler (registered with FfiClient)
-  void on_ffi_event(const proto::FfiEvent &event);
+  void onFfiEvent(const proto::FfiEvent &event);
 
   // Queue helpers
-  void push_frame(AudioFrameEvent &&ev);
-  void push_eos();
+  void pushFrame(AudioFrameEvent &&ev);
+  void pushEos();
 
   mutable std::mutex mutex_;
   std::condition_variable cv_;
diff --git a/include/livekit/ffi_client.h b/include/livekit/ffi_client.h
index 602f9c8..51cd226 100644
--- a/include/livekit/ffi_client.h
+++ b/include/livekit/ffi_client.h
@@ -38,6 +38,7 @@ class OwnedTrackPublication;
 class TranscriptionSegment;
 } // namespace proto
 
+struct RoomOptions;
 struct TrackPublishOptions;
 
 using FfiCallbackFn = void (*)(const uint8_t *, size_t);
@@ -76,7 +77,8 @@ class FfiClient {
 
   // Room APIs
   std::future<proto::ConnectCallback> connectAsync(const std::string &url,
-                                                   const std::string &token);
+                                                   const std::string &token,
+                                                   const RoomOptions &options);
 
   // Track APIs
   std::future<std::vector<RtcStats>> getTrackStatsAsync(uintptr_t track_handle);
diff --git a/include/livekit/livekit.h b/include/livekit/livekit.h
index 4b5fc30..3eab63d 100644
--- a/include/livekit/livekit.h
+++ b/include/livekit/livekit.h
@@ -16,14 +16,17 @@
 
 #include "audio_frame.h"
 #include "audio_source.h"
+#include "audio_stream.h"
 #include "local_audio_track.h"
 #include "local_participant.h"
 #include "local_track_publication.h"
 #include "local_video_track.h"
 #include "participant.h"
 #include "remote_participant.h"
+#include "remote_track_publication.h"
 #include "room.h"
 #include "room_delegate.h"
 #include "track_publication.h"
 #include "video_frame.h"
-#include "video_source.h"
\ No newline at end of file
+#include "video_source.h"
+#include "video_stream.h"
\ No newline at end of file
diff --git a/include/livekit/local_participant.h b/include/livekit/local_participant.h
index 65dcb16..1077d73 100644
--- a/include/livekit/local_participant.h
+++ b/include/livekit/local_participant.h
@@ -38,8 +38,7 @@ struct Transcription;
 /**
  * Represents the local participant in a room.
  *
- * C++ analogue of the Python LocalParticipant, built on top of the C++
- * Participant base class.
+ * LocalParticipant, built on top of the participant.h base class.
  */
 class LocalParticipant : public Participant {
 public:
diff --git a/include/livekit/participant.h b/include/livekit/participant.h
index a0003fe..0cc7418 100644
--- a/include/livekit/participant.h
+++ b/include/livekit/participant.h
@@ -31,7 +31,6 @@ enum class ParticipantKind { Standard = 0, Ingress, Egress, Sip, Agent };
 
 class Participant {
 public:
-  // TODO, consider holding a weak ptr of FfiHandle if it is useful.
   Participant(FfiHandle handle, std::string sid, std::string name,
               std::string identity, std::string metadata,
               std::unordered_map<std::string, std::string> attributes,
diff --git a/include/livekit/remote_audio_track.h b/include/livekit/remote_audio_track.h
index 97ca2df..99e4c8e 100644
--- a/include/livekit/remote_audio_track.h
+++ b/include/livekit/remote_audio_track.h
@@ -33,11 +33,7 @@ class AudioSource;
 // ============================================================
 class RemoteAudioTrack : public Track {
 public:
-  explicit RemoteAudioTrack(FfiHandle handle, const proto::OwnedTrack &track);
-
-  static std::shared_ptr<RemoteAudioTrack>
-  createRemoteAudioTrack(const std::string &name,
-                         const std::shared_ptr<AudioSource> &source);
+  explicit RemoteAudioTrack(const proto::OwnedTrack &track);
 
   std::string to_string() const;
 
diff --git a/include/livekit/remote_video_track.h b/include/livekit/remote_video_track.h
index cbed139..c6abfce 100644
--- a/include/livekit/remote_video_track.h
+++ b/include/livekit/remote_video_track.h
@@ -33,11 +33,7 @@ class VideoSource;
 // ============================================================
 class RemoteVideoTrack : public Track {
 public:
-  explicit RemoteVideoTrack(FfiHandle handle, const proto::OwnedTrack &track);
-
-  static std::shared_ptr<RemoteVideoTrack>
-  createRemoteVideoTrack(const std::string &name,
-                         const std::shared_ptr<VideoSource> &source);
+  explicit RemoteVideoTrack(const proto::OwnedTrack &track);
 
   std::string to_string() const;
 
diff --git a/include/livekit/room.h b/include/livekit/room.h
index 98077ed..f2e21d4 100644
--- a/include/livekit/room.h
+++ b/include/livekit/room.h
@@ -34,16 +34,146 @@ class FfiEvent;
 class LocalParticipant;
 class RemoteParticipant;
 
+// Represents end-to-end encryption (E2EE) settings.
+// Mirrors python sdk: `E2EEOptions`
+struct E2EEOptions {
+  // Encryption algorithm type.
+  int encryption_type = 0;
+
+  // Shared static key. If provided, this key is used for encryption.
+  std::string shared_key;
+
+  // Salt used when deriving ratcheted encryption keys.
+  std::string ratchet_salt;
+
+  // How many consecutive ratcheting failures are tolerated before an error.
+  int failure_tolerance = 0;
+
+  // Maximum size of the ratchet window.
+  int ratchet_window_size = 0;
+};
+
+// Represents a single ICE server configuration.
+// Mirrors python: RtcConfiguration.ice_servers[*]
+struct IceServer {
+  // TURN/STUN server URL (e.g. "stun:stun.l.google.com:19302").
+  std::string url;
+
+  // Optional username for TURN authentication.
+  std::string username;
+
+  // Optional credential (password) for TURN authentication.
+  std::string credential;
+};
+
+// WebRTC configuration (ICE, transport, etc.).
+// Mirrors python: `RtcConfiguration`
+struct RtcConfig {
+  // ICE transport type (e.g., ALL, RELAY). Maps to proto::IceTransportType.
+  int ice_transport_type = 0;
+
+  // Continuous or single ICE gathering. Maps to
+  // proto::ContinualGatheringPolicy.
+  int continual_gathering_policy = 0;
+
+  // List of STUN/TURN servers for ICE candidate generation.
+  std::vector<IceServer> ice_servers;
+};
+
+// Top-level room connection options.
+// Mirrors python: `RoomOptions`
+struct RoomOptions {
+  // If true (default), automatically subscribe to all remote tracks.
+  // This is CRITICAL. Without auto_subscribe, you will never receive:
+  //   - `track_subscribed` events
+  //   - remote audio/video frames
+  bool auto_subscribe = true;
+
+  // Enable dynacast (server sends optimal layers depending on subscribers).
+  bool dynacast = false;
+
+  // Optional end-to-end encryption settings.
+  std::optional<E2EEOptions> e2ee;
+
+  // Optional WebRTC configuration (ICE policy, servers, etc.)
+  std::optional<RtcConfig> rtc_config;
+};
+
+// Represents a LiveKit room session.
+// A Room manages:
+//   - the connection to the LiveKit server
+//   - participant list (local + remote)
+//   - track publications
+//   - server events forwarded to a RoomDelegate
 class Room {
 public:
   Room();
   ~Room();
+
+  // Assign a RoomDelegate that receives room lifecycle callbacks.
+  //
+  // The delegate must remain valid for the lifetime of the Room or until a
+  // different delegate is assigned. The Room does not take ownership.
+  // Typical usage:
+  //     class MyDelegate : public RoomDelegate { ... };
+  //     MyDelegate del;
+  //     Room room;
+  //     room.setDelegate(&del);
   void setDelegate(RoomDelegate *delegate);
-  bool Connect(const std::string &url, const std::string &token);
+
+  // Connect to a LiveKit room using the given URL and token,  applying the
+  // supplied connection options.
+  //
+  // Parameters:
+  //   url      — WebSocket URL of the LiveKit server.
+  //   token    — Access token for authentication.
+  //   options  — Connection options controlling auto-subscribe,
+  //               dynacast, E2EE, and WebRTC configuration.
+  // Behavior:
+  //   - Registers an FFI event listener *before* sending the connect request.
+  //   - Sends a proto::FfiRequest::Connect with the URL, token,
+  //     and the provided RoomOptions.
+  //   - Blocks until the FFI connect response arrives.
+  //   - Initializes local participant and remote participants.
+  //   - Emits room/participant/track events to the delegate.
+  // IMPORTANT:
+  //   RoomOptions defaults auto_subscribe = true.
+  //   Without auto_subscribe enabled, remote tracks will NOT be subscribed
+  //   automatically, and no remote audio/video will ever arrive.
+  bool Connect(const std::string &url, const std::string &token,
+               const RoomOptions &options);
 
   // Accessors
+
+  // Retrieve static metadata about the room.
+  // This contains fields such as:
+  //   - SID
+  //   - room name
+  //   - metadata
+  //   - participant counts
+  //   - creation timestamp
   RoomInfoData room_info() const;
+
+  // Get the local participant.
+  //
+  // This object represents the current user, including:
+  //   - published tracks (audio/video/screen)
+  //   - identity, SID, metadata
+  //   - publishing/unpublishing operations
+  // Return value:
+  //   Non-null pointer after successful Connect().
   LocalParticipant *local_participant() const;
+
+  // Look up a remote participant by identity.
+  //
+  // Parameters:
+  //   identity — The participant’s identity string (not SID)
+  // Return value:
+  //   Pointer to RemoteParticipant if present, otherwise nullptr.
+  // RemoteParticipant contains:
+  //   - identity/name/metadata
+  //   - track publications
+  //   - callbacks for track subscribed/unsubscribed, muted/unmuted
   RemoteParticipant *remote_participant(const std::string &identity) const;
 
 private:
@@ -53,7 +183,7 @@ class Room {
   RoomInfoData room_info_;
   std::shared_ptr<FfiHandle> room_handle_;
   std::unique_ptr<LocalParticipant> local_participant_;
-  std::unordered_map<std::string, std::unique_ptr<RemoteParticipant>>
+  std::unordered_map<std::string, std::shared_ptr<RemoteParticipant>>
       remote_participants_;
 
   void OnEvent(const proto::FfiEvent &event);
diff --git a/include/livekit/room_delegate.h b/include/livekit/room_delegate.h
index 72411fc..9073654 100644
--- a/include/livekit/room_delegate.h
+++ b/include/livekit/room_delegate.h
@@ -27,6 +27,9 @@ namespace livekit {
 class Room;
 enum class VideoCodec;
 enum class TrackSource;
+class Track;
+class RemoteTrackPublication;
+class RemoteParticipant;
 
 enum class ConnectionQuality {
   Poor,
@@ -237,11 +240,9 @@ struct TrackUnpublishedEvent {
 };
 
 struct TrackSubscribedEvent {
-  std::string participant_identity;
-  std::string track_sid;
-  std::string track_name;
-  std::string track_kind;   // or enum
-  std::string track_source; // or enum
+  std::shared_ptr<Track> track;
+  std::shared_ptr<RemoteTrackPublication> publication;
+  RemoteParticipant *participant = nullptr;
 };
 
 struct TrackUnsubscribedEvent {
diff --git a/include/livekit/stats.h b/include/livekit/stats.h
index c9aa1f6..9a0c51c 100644
--- a/include/livekit/stats.h
+++ b/include/livekit/stats.h
@@ -49,10 +49,6 @@ class CertificateStats;
 class StreamStats;
 } // namespace proto
 
-// ----------------------
-// SDK enums (decoupled from proto enums)
-// ----------------------
-
 enum class DataChannelState {
   Connecting,
   Open,
diff --git a/include/livekit/track.h b/include/livekit/track.h
index 2c5f9a9..2487d39 100644
--- a/include/livekit/track.h
+++ b/include/livekit/track.h
@@ -25,9 +25,10 @@
 #include <unordered_map>
 #include <vector>
 
+#include <iostream>
+
 namespace livekit {
 
-// ----- Enums from track.proto -----
 enum class TrackKind {
   KIND_UNKNOWN = 0,
   KIND_AUDIO = 1,
diff --git a/include/livekit/track_publication.h b/include/livekit/track_publication.h
index 8503188..19b8055 100644
--- a/include/livekit/track_publication.h
+++ b/include/livekit/track_publication.h
@@ -38,7 +38,7 @@ class LocalTrack;
 class RemoteTrack;
 
 /**
- * C++ analogue of Python TrackPublication.
+ * C++ TrackPublication.
  *
  * Wraps the immutable publication info plus an FFI handle, and
  * holds a weak reference to the associated Track (if any).
@@ -72,8 +72,9 @@ class TrackPublication {
   uintptr_t ffiHandleId() const noexcept { return handle_.get(); }
 
   /// Associated Track (if attached).
-  std::shared_ptr<Track> track() const noexcept { return track_.lock(); }
+  std::shared_ptr<Track> track() const noexcept { return track_; }
   void setTrack(const std::shared_ptr<Track> &track) noexcept {
+    std::cout << "track_ is null " << (track_.get() == nullptr) << std::endl;
     track_ = track;
   }
 
@@ -86,7 +87,7 @@ class TrackPublication {
                    std::vector<AudioTrackFeature> audio_features);
 
   FfiHandle handle_;
-  std::weak_ptr<Track> track_;
+  std::shared_ptr<Track> track_;
 
   std::string sid_;
   std::string name_;
diff --git a/include/livekit/video_frame.h b/include/livekit/video_frame.h
index a43dc21..878aaca 100644
--- a/include/livekit/video_frame.h
+++ b/include/livekit/video_frame.h
@@ -25,9 +25,9 @@ namespace livekit {
 
 // Mirror of WebRTC video buffer type
 enum class VideoBufferType {
-  ARGB,
+  RGBA = 0,
   ABGR,
-  RGBA,
+  ARGB,
   BGRA,
   RGB24,
   I420,
@@ -58,7 +58,7 @@ class OwnedVideoBuffer;
  */
 class LKVideoFrame {
 public:
-  LKVideoFrame() = delete;
+  LKVideoFrame();
   LKVideoFrame(int width, int height, VideoBufferType type,
                std::vector<std::uint8_t> data);
 
diff --git a/include/livekit/video_stream.h b/include/livekit/video_stream.h
index 322ab1d..fea3a93 100644
--- a/include/livekit/video_stream.h
+++ b/include/livekit/video_stream.h
@@ -31,7 +31,7 @@
 
 namespace livekit {
 
-// C++ equivalent of Python VideoFrameEvent
+// A single video frame event delivered by VideoStream::read().
 struct VideoFrameEvent {
   LKVideoFrame frame;
   std::int64_t timestamp_us;
@@ -42,19 +42,42 @@ namespace proto {
 class FfiEvent;
 }
 
+// Represents a pull-based stream of decoded PCM audio frames coming from
+// a remote (or local) LiveKit track. Similar to VideoStream, but for audio.
+//
+// Typical usage:
+//
+//   AudioStream::Options opts;
+//   auto stream = AudioStream::fromTrack(remoteAudioTrack, opts);
+//
+//   AudioFrameEvent ev;
+//   while (stream->read(ev)) {
+//     // ev.frame contains interleaved int16 PCM samples
+//   }
+//
+//   stream->close();  // optional, called automatically in destructor
+//
 class VideoStream {
 public:
   struct Options {
-    std::size_t capacity{0}; // 0 = unbounded
-    VideoBufferType format;
+    // Maximum number of VideoFrameEvent items buffered in the internal queue.
+    // 0 means "unbounded" (the queue can grow without limit).
+    //
+    // With a non-zero capacity, the queue behaves like a ring-buffer: if it
+    // is full, the oldest frame is dropped when a new one arrives.
+    std::size_t capacity{0};
+
+    // Preferred pixel format for frames delivered by read(). The FFI layer
+    // converts into this format if supported (e.g., RGBA, BGRA, I420, ...).
+    VideoBufferType format{VideoBufferType::RGBA};
   };
 
   // Factory: create a VideoStream bound to a specific Track
-  static std::unique_ptr<VideoStream>
+  static std::shared_ptr<VideoStream>
   fromTrack(const std::shared_ptr<Track> &track, const Options &options);
 
   // Factory: create a VideoStream from a Participant + TrackSource
-  static std::unique_ptr<VideoStream> fromParticipant(Participant &participant,
+  static std::shared_ptr<VideoStream> fromParticipant(Participant &participant,
                                                       TrackSource track_source,
                                                       const Options &options);
 
@@ -65,12 +88,19 @@ class VideoStream {
   VideoStream(VideoStream &&) noexcept;
   VideoStream &operator=(VideoStream &&) noexcept;
 
-  /// Blocking read: returns true if a frame was delivered,
-  /// false if the stream has ended (EOS or closed).
+  /// Blocking read: waits until a VideoFrameEvent is available in the internal
+  /// queue, or the stream reaches EOS / is closed.
+  ///
+  /// \param out  On success, filled with the next video frame event.
+  /// \return true if a frame was delivered; false if the stream ended
+  ///         (end-of-stream or close()) and no more data is available.
   bool read(VideoFrameEvent &out);
 
-  /// Signal that we are no longer interested in frames.
-  /// Disposes the underlying FFI stream and drains internal listener.
+  /// Signal that we are no longer interested in video frames.
+  ///
+  /// This disposes the underlying FFI video stream, unregisters the listener
+  /// from FfiClient, marks the stream as closed, and wakes any blocking read().
+  /// After calling close(), further calls to read() will return false.
   void close();
 
 private:
diff --git a/src/audio_frame.cpp b/src/audio_frame.cpp
index 0df9f76..23ff01e 100644
--- a/src/audio_frame.cpp
+++ b/src/audio_frame.cpp
@@ -28,6 +28,9 @@
 
 namespace livekit {
 
+AudioFrame::AudioFrame()
+    : sample_rate_(0), num_channels_(0), samples_per_channel_(0) {}
+
 AudioFrame::AudioFrame(std::vector<std::int16_t> data, int sample_rate,
                        int num_channels, int samples_per_channel)
     : data_(std::move(data)), sample_rate_(sample_rate),
diff --git a/src/audio_stream.cpp b/src/audio_stream.cpp
index 975908f..908c6b3 100644
--- a/src/audio_stream.cpp
+++ b/src/audio_stream.cpp
@@ -32,20 +32,19 @@ using proto::FfiRequest;
 // Factory helpers
 // ------------------------
 
-std::unique_ptr<AudioStream>
-AudioStream::from_track(const std::shared_ptr<Track> &track,
-                        const Options &options) {
-  auto stream = std::unique_ptr<AudioStream>(new AudioStream());
-  stream->init_from_track(track, options);
+std::shared_ptr<AudioStream>
+AudioStream::fromTrack(const std::shared_ptr<Track> &track,
+                       const Options &options) {
+  auto stream = std::shared_ptr<AudioStream>(new AudioStream());
+  stream->initFromTrack(track, options);
   return stream;
 }
 
-std::unique_ptr<AudioStream>
-AudioStream::from_participant(Participant &participant,
-                              TrackSource track_source,
-                              const Options &options) {
-  auto stream = std::unique_ptr<AudioStream>(new AudioStream());
-  stream->init_from_participant(participant, track_source, options);
+std::shared_ptr<AudioStream>
+AudioStream::fromParticipant(Participant &participant, TrackSource track_source,
+                             const Options &options) {
+  auto stream = std::shared_ptr<AudioStream>(new AudioStream());
+  stream->initFromParticipant(participant, track_source, options);
   return stream;
 }
 
@@ -135,22 +134,24 @@ void AudioStream::close() {
 
 // Internal functions
 
-void AudioStream::init_from_track(const std::shared_ptr<Track> &track,
-                                  const Options &options) {
+void AudioStream::initFromTrack(const std::shared_ptr<Track> &track,
+                                const Options &options) {
   capacity_ = options.capacity;
   options_ = options;
 
   // 1) Subscribe to FFI events
   listener_id_ = FfiClient::instance().AddListener(
-      [this](const FfiEvent &e) { this->on_ffi_event(e); });
+      [this](const FfiEvent &e) { this->onFfiEvent(e); });
 
   // 2) Send FfiRequest to create a new audio stream bound to this track
   FfiRequest req;
   auto *new_audio_stream = req.mutable_new_audio_stream();
   new_audio_stream->set_track_handle(
       static_cast<uint64_t>(track->ffi_handle_id()));
-  new_audio_stream->set_sample_rate(options_.sample_rate);
-  new_audio_stream->set_num_channels(options.num_channels);
+  // TODO, sample_rate and num_channels are not useful in AudioStream, remove it
+  // from FFI.
+  //  new_audio_stream->set_sample_rate(options_.sample_rate);
+  //  new_audio_stream->set_num_channels(options.num_channels);
   new_audio_stream->set_type(proto::AudioStreamType::AUDIO_STREAM_NATIVE);
 
   if (!options_.noise_cancellation_module.empty()) {
@@ -167,22 +168,24 @@ void AudioStream::init_from_track(const std::shared_ptr<Track> &track,
   stream_handle_ = FfiHandle(static_cast<uintptr_t>(stream.handle().id()));
 }
 
-void AudioStream::init_from_participant(Participant &participant,
-                                        TrackSource track_source,
-                                        const Options &options) {
+void AudioStream::initFromParticipant(Participant &participant,
+                                      TrackSource track_source,
+                                      const Options &options) {
   capacity_ = options.capacity;
   options_ = options;
 
   // 1) Subscribe to FFI events
   listener_id_ = FfiClient::instance().AddListener(
-      [this](const FfiEvent &e) { this->on_ffi_event(e); });
+      [this](const FfiEvent &e) { this->onFfiEvent(e); });
 
   // 2) Send FfiRequest to create audio stream from participant + track source
   FfiRequest req;
   auto *as = req.mutable_audio_stream_from_participant();
   as->set_participant_handle(participant.ffiHandleId());
-  as->set_sample_rate(options_.sample_rate);
-  as->set_num_channels(options_.num_channels);
+  // TODO, sample_rate and num_channels are not useful in AudioStream, remove it
+  // from FFI.
+  // as->set_sample_rate(options_.sample_rate);
+  // as->set_num_channels(options_.num_channels);
   as->set_type(proto::AudioStreamType::AUDIO_STREAM_NATIVE);
   as->set_track_source(static_cast<proto::TrackSource>(track_source));
 
@@ -198,11 +201,10 @@ void AudioStream::init_from_participant(Participant &participant,
   stream_handle_ = FfiHandle(static_cast<uintptr_t>(stream.handle().id()));
 }
 
-void AudioStream::on_ffi_event(const FfiEvent &event) {
+void AudioStream::onFfiEvent(const FfiEvent &event) {
   if (event.message_case() != FfiEvent::kAudioStreamEvent) {
     return;
   }
-
   const auto &ase = event.audio_stream_event();
   // Check if this event is for our stream handle.
   if (ase.stream_handle() != static_cast<std::uint64_t>(stream_handle_.get())) {
@@ -216,13 +218,13 @@ void AudioStream::on_ffi_event(const FfiEvent &event) {
     // AudioFrame._from_owned_info.
     AudioFrame frame = AudioFrame::fromOwnedInfo(fr.frame());
     AudioFrameEvent ev{std::move(frame)};
-    push_frame(std::move(ev));
+    pushFrame(std::move(ev));
   } else if (ase.has_eos()) {
-    push_eos();
+    pushEos();
   }
 }
 
-void AudioStream::push_frame(AudioFrameEvent &&ev) {
+void AudioStream::pushFrame(AudioFrameEvent &&ev) {
   {
     std::lock_guard<std::mutex> lock(mutex_);
 
@@ -240,7 +242,7 @@ void AudioStream::push_frame(AudioFrameEvent &&ev) {
   cv_.notify_one();
 }
 
-void AudioStream::push_eos() {
+void AudioStream::pushEos() {
   {
     std::lock_guard<std::mutex> lock(mutex_);
     if (eof_) {
diff --git a/src/ffi_client.cpp b/src/ffi_client.cpp
index 0fe4d3b..5b426d0 100644
--- a/src/ffi_client.cpp
+++ b/src/ffi_client.cpp
@@ -17,11 +17,14 @@
 #include <cassert>
 
 #include "build.h"
+#include "e2ee.pb.h"
 #include "ffi.pb.h"
 #include "livekit/ffi_client.h"
 #include "livekit/ffi_handle.h"
+#include "livekit/room.h" // TODO, maybe avoid circular deps by moving RoomOptions to a room_types.h ?
 #include "livekit/track.h"
 #include "livekit_ffi.h"
+#include "room.pb.h"
 #include "room_proto_converter.h"
 
 namespace livekit {
@@ -133,14 +136,64 @@ std::future<T> FfiClient::registerAsync(
 
 // Room APIs Implementation
 std::future<proto::ConnectCallback>
-FfiClient::connectAsync(const std::string &url, const std::string &token) {
+FfiClient::connectAsync(const std::string &url, const std::string &token,
+                        const RoomOptions &options) {
 
   proto::FfiRequest req;
   auto *connect = req.mutable_connect();
   connect->set_url(url);
   connect->set_token(token);
-  connect->mutable_options()->set_auto_subscribe(true);
+  auto *opts = connect->mutable_options();
+  opts->set_auto_subscribe(options.auto_subscribe);
+  opts->set_dynacast(options.dynacast);
+  std::cout << "connectAsync " << std::endl;
+  // --- E2EE / encryption (optional) ---
+  if (options.e2ee.has_value()) {
+    std::cout << "connectAsync e2ee " << std::endl;
+    const E2EEOptions &eo = *options.e2ee;
+
+    // Use the non-deprecated encryption field
+    auto *enc = opts->mutable_encryption();
+
+    enc->set_encryption_type(
+        static_cast<proto::EncryptionType>(eo.encryption_type));
+
+    auto *kp = enc->mutable_key_provider_options();
+    kp->set_shared_key(eo.shared_key);
+    kp->set_ratchet_salt(eo.ratchet_salt);
+    kp->set_failure_tolerance(eo.failure_tolerance);
+    kp->set_ratchet_window_size(eo.ratchet_window_size);
+  }
+
+  // --- RTC configuration (optional) ---
+  if (options.rtc_config.has_value()) {
+    std::cout << "options.rtc_config.has_value() " << std::endl;
+    const RtcConfig &rc = *options.rtc_config;
+    auto *rtc = opts->mutable_rtc_config();
+
+    rtc->set_ice_transport_type(
+        static_cast<proto::IceTransportType>(rc.ice_transport_type));
+    rtc->set_continual_gathering_policy(
+        static_cast<proto::ContinualGatheringPolicy>(
+            rc.continual_gathering_policy));
 
+    for (const IceServer &ice : rc.ice_servers) {
+      auto *s = rtc->add_ice_servers();
+
+      // proto: repeated string urls = 1
+      if (!ice.url.empty()) {
+        s->add_urls(ice.url);
+      }
+      if (!ice.username.empty()) {
+        s->set_username(ice.username);
+      }
+      if (!ice.credential.empty()) {
+        // proto: password = 3
+        s->set_password(ice.credential);
+      }
+    }
+  }
+  std::cout << "connectAsync sendRequest  " << std::endl;
   proto::FfiResponse resp = sendRequest(req);
   if (!resp.has_connect()) {
     throw std::runtime_error("FfiResponse missing connect");
diff --git a/src/remote_audio_track.cpp b/src/remote_audio_track.cpp
index 4384920..f8e4b81 100644
--- a/src/remote_audio_track.cpp
+++ b/src/remote_audio_track.cpp
@@ -24,26 +24,13 @@
 
 namespace livekit {
 
-RemoteAudioTrack::RemoteAudioTrack(FfiHandle handle,
-                                   const proto::OwnedTrack &track)
-    : Track(std::move(handle), track.info().sid(), track.info().name(),
+RemoteAudioTrack::RemoteAudioTrack(const proto::OwnedTrack &track)
+    : Track(FfiHandle{static_cast<uintptr_t>(track.handle().id())},
+            track.info().sid(), track.info().name(),
             fromProto(track.info().kind()),
             fromProto(track.info().stream_state()), track.info().muted(),
             true) {}
 
-std::shared_ptr<RemoteAudioTrack> RemoteAudioTrack::createRemoteAudioTrack(
-    const std::string &name, const std::shared_ptr<AudioSource> &source) {
-  proto::FfiRequest req;
-  auto *msg = req.mutable_create_audio_track();
-  msg->set_name(name);
-  msg->set_source_handle(static_cast<uint64_t>(source->ffi_handle_id()));
-
-  proto::FfiResponse resp = FfiClient::instance().sendRequest(req);
-  const proto::OwnedTrack &owned = resp.create_audio_track().track();
-  FfiHandle handle(static_cast<uintptr_t>(owned.handle().id()));
-  return std::make_shared<RemoteAudioTrack>(std::move(handle), owned);
-}
-
 std::string RemoteAudioTrack::to_string() const {
   return "rtc.RemoteAudioTrack(sid=" + sid() + ", name=" + name() + ")";
 }
diff --git a/src/remote_video_track.cpp b/src/remote_video_track.cpp
index 02b5d77..3fd9ee0 100644
--- a/src/remote_video_track.cpp
+++ b/src/remote_video_track.cpp
@@ -24,26 +24,13 @@
 
 namespace livekit {
 
-RemoteVideoTrack::RemoteVideoTrack(FfiHandle handle,
-                                   const proto::OwnedTrack &track)
-    : Track(std::move(handle), track.info().sid(), track.info().name(),
+RemoteVideoTrack::RemoteVideoTrack(const proto::OwnedTrack &track)
+    : Track(FfiHandle{static_cast<uintptr_t>(track.handle().id())},
+            track.info().sid(), track.info().name(),
             fromProto(track.info().kind()),
             fromProto(track.info().stream_state()), track.info().muted(),
             true) {}
 
-std::shared_ptr<RemoteVideoTrack> RemoteVideoTrack::createRemoteVideoTrack(
-    const std::string &name, const std::shared_ptr<VideoSource> &source) {
-  proto::FfiRequest req;
-  auto *msg = req.mutable_create_video_track();
-  msg->set_name(name);
-  msg->set_source_handle(static_cast<uint64_t>(source->ffi_handle_id()));
-
-  proto::FfiResponse resp = FfiClient::instance().sendRequest(req);
-  const proto::OwnedTrack &owned = resp.create_video_track().track();
-  FfiHandle handle(static_cast<uintptr_t>(owned.handle().id()));
-  return std::make_shared<RemoteVideoTrack>(std::move(handle), owned);
-}
-
 std::string RemoteVideoTrack::to_string() const {
   return "rtc.RemoteVideoTrack(sid=" + sid() + ", name=" + name() + ")";
 }
diff --git a/src/room.cpp b/src/room.cpp
index 9f56cdb..f52edb1 100644
--- a/src/room.cpp
+++ b/src/room.cpp
@@ -16,16 +16,21 @@
 
 #include "livekit/room.h"
 
+#include "livekit/audio_stream.h"
 #include "livekit/ffi_client.h"
 #include "livekit/local_participant.h"
 #include "livekit/local_track_publication.h"
+#include "livekit/remote_audio_track.h"
 #include "livekit/remote_participant.h"
 #include "livekit/remote_track_publication.h"
+#include "livekit/remote_video_track.h"
 #include "livekit/room_delegate.h"
+#include "livekit/video_stream.h"
 
 #include "ffi.pb.h"
 #include "room.pb.h"
 #include "room_proto_converter.h"
+#include "track.pb.h"
 #include "track_proto_converter.h"
 #include <functional>
 #include <iostream>
@@ -37,7 +42,6 @@ using proto::ConnectRequest;
 using proto::FfiEvent;
 using proto::FfiRequest;
 using proto::FfiResponse;
-using proto::RoomOptions;
 
 namespace {
 
@@ -67,7 +71,8 @@ void Room::setDelegate(RoomDelegate *delegate) {
   delegate_ = delegate;
 }
 
-bool Room::Connect(const std::string &url, const std::string &token) {
+bool Room::Connect(const std::string &url, const std::string &token,
+                   const RoomOptions &options) {
   auto listenerId = FfiClient::instance().AddListener(
       std::bind(&Room::OnEvent, this, std::placeholders::_1));
   {
@@ -77,7 +82,7 @@ bool Room::Connect(const std::string &url, const std::string &token) {
       throw std::runtime_error("already connected");
     }
   }
-  auto fut = FfiClient::instance().connectAsync(url, token);
+  auto fut = FfiClient::instance().connectAsync(url, token, options);
   try {
     auto connectCb =
         fut.get(); // fut will throw if it fails to connect to the room
@@ -261,8 +266,61 @@ void Room::OnEvent(const FfiEvent &event) {
       break;
     }
     case proto::RoomEvent::kTrackSubscribed: {
-      auto ev = fromProto(re.track_subscribed());
+      const auto &ts = re.track_subscribed();
+      const std::string &identity = ts.participant_identity();
+      const auto &owned_track = ts.track();
+      const auto &track_info = owned_track.info();
+      std::shared_ptr<RemoteTrackPublication> rpublication;
+      RemoteParticipant *rparticipant = nullptr;
+      std::shared_ptr<Track> remote_track;
+      {
+        std::lock_guard<std::mutex> guard(lock_);
+        // Find participant
+        auto pit = remote_participants_.find(identity);
+        if (pit == remote_participants_.end()) {
+          std::cerr << "track_subscribed for unknown participant: " << identity
+                    << "\n";
+          break;
+        }
+        rparticipant = pit->second.get();
+        // Find existing publication by track SID (from track_published)
+        auto &pubs = rparticipant->mutable_track_publications();
+        auto pubIt = pubs.find(track_info.sid());
+        if (pubIt == pubs.end()) {
+          std::cerr << "track_subscribed for unknown publication sid "
+                    << track_info.sid() << " (participant " << identity
+                    << ")\n";
+          break;
+        }
+        rpublication = pubIt->second;
+
+        // Create RemoteVideoTrack / RemoteAudioTrack
+        if (track_info.kind() == proto::TrackKind::KIND_VIDEO) {
+          remote_track = std::make_shared<RemoteVideoTrack>(owned_track);
+        } else if (track_info.kind() == proto::TrackKind::KIND_AUDIO) {
+          remote_track = std::make_shared<RemoteAudioTrack>(owned_track);
+        } else {
+          std::cerr << "track_subscribed with unsupported kind: "
+                    << track_info.kind() << "\n";
+          break;
+        }
+        std::cout << "before setTrack " << std::endl;
+
+        // Attach to publication, mark subscribed
+        rpublication->setTrack(remote_track);
+        std::cout << "setTrack " << std::endl;
+        rpublication->setSubscribed(true);
+        std::cout << "setSubscribed " << std::endl;
+      }
+
+      // Emit remote track_subscribed-style callback
+      TrackSubscribedEvent ev;
+      ev.track = remote_track;
+      ev.publication = rpublication;
+      ev.participant = rparticipant;
+      std::cout << "onTrackSubscribed " << std::endl;
       delegate_snapshot->onTrackSubscribed(*this, ev);
+      std::cout << "after onTrackSubscribed " << std::endl;
       break;
     }
     case proto::RoomEvent::kTrackUnsubscribed: {
diff --git a/src/room_event_converter.cpp b/src/room_event_converter.cpp
index 651896f..bb1846c 100644
--- a/src/room_event_converter.cpp
+++ b/src/room_event_converter.cpp
@@ -260,18 +260,6 @@ LocalTrackSubscribedEvent fromProto(const proto::LocalTrackSubscribed &src) {
   return ev;
 }
 
-TrackPublishedEvent fromProto(const proto::TrackPublished &src) {
-  TrackPublishedEvent ev;
-  ev.participant_identity = src.participant_identity();
-  // OwnedTrackPublication publication = 2;
-  // TODO: map publication info once you inspect OwnedTrackPublication
-  // ev.publication_sid = src.publication().info().sid();
-  // ev.track_name      = src.publication().info().name();
-  // ev.track_kind      = ...;
-  // ev.track_source    = ...;
-  return ev;
-}
-
 TrackUnpublishedEvent fromProto(const proto::TrackUnpublished &src) {
   TrackUnpublishedEvent ev;
   ev.participant_identity = src.participant_identity();
@@ -279,18 +267,6 @@ TrackUnpublishedEvent fromProto(const proto::TrackUnpublished &src) {
   return ev;
 }
 
-TrackSubscribedEvent fromProto(const proto::TrackSubscribed &src) {
-  TrackSubscribedEvent ev;
-  ev.participant_identity = src.participant_identity();
-  // OwnedTrack track = 2;
-  // TODO: map track info once you inspect OwnedTrack
-  // ev.track_sid   = src.track().info().sid();
-  // ev.track_name  = src.track().info().name();
-  // ev.track_kind  = ...;
-  // ev.track_source = ...;
-  return ev;
-}
-
 TrackUnsubscribedEvent fromProto(const proto::TrackUnsubscribed &src) {
   TrackUnsubscribedEvent ev;
   ev.participant_identity = src.participant_identity();
diff --git a/src/room_event_converter.h b/src/room_event_converter.h
index 96d9c39..f8dde1f 100644
--- a/src/room_event_converter.h
+++ b/src/room_event_converter.h
@@ -52,7 +52,6 @@ LocalTrackSubscribedEvent fromProto(const proto::LocalTrackSubscribed &src);
 
 TrackPublishedEvent fromProto(const proto::TrackPublished &src);
 TrackUnpublishedEvent fromProto(const proto::TrackUnpublished &src);
-TrackSubscribedEvent fromProto(const proto::TrackSubscribed &src);
 TrackUnsubscribedEvent fromProto(const proto::TrackUnsubscribed &src);
 TrackSubscriptionFailedEvent
 fromProto(const proto::TrackSubscriptionFailed &src);
diff --git a/src/room_proto_converter.cpp b/src/room_proto_converter.cpp
index 5423f07..0c41a6a 100644
--- a/src/room_proto_converter.cpp
+++ b/src/room_proto_converter.cpp
@@ -269,18 +269,6 @@ TrackUnpublishedEvent fromProto(const proto::TrackUnpublished &in) {
   return ev;
 }
 
-TrackSubscribedEvent fromProto(const proto::TrackSubscribed &in) {
-  TrackSubscribedEvent ev;
-  ev.participant_identity = in.participant_identity();
-  // OwnedTrack track = 2;
-  // TODO: map track info once you inspect OwnedTrack
-  // ev.track_sid   = in.track().info().sid();
-  // ev.track_name  = in.track().info().name();
-  // ev.track_kind  = ...;
-  // ev.track_source = ...;
-  return ev;
-}
-
 TrackUnsubscribedEvent fromProto(const proto::TrackUnsubscribed &in) {
   TrackUnsubscribedEvent ev;
   ev.participant_identity = in.participant_identity();
diff --git a/src/room_proto_converter.h b/src/room_proto_converter.h
index eafba5a..89ba7c4 100644
--- a/src/room_proto_converter.h
+++ b/src/room_proto_converter.h
@@ -51,7 +51,6 @@ LocalTrackSubscribedEvent fromProto(const proto::LocalTrackSubscribed &in);
 
 TrackPublishedEvent fromProto(const proto::TrackPublished &in);
 TrackUnpublishedEvent fromProto(const proto::TrackUnpublished &in);
-TrackSubscribedEvent fromProto(const proto::TrackSubscribed &in);
 TrackUnsubscribedEvent fromProto(const proto::TrackUnsubscribed &in);
 TrackSubscriptionFailedEvent
 fromProto(const proto::TrackSubscriptionFailed &in);
diff --git a/src/video_frame.cpp b/src/video_frame.cpp
index a56e7f2..c06bff4 100644
--- a/src/video_frame.cpp
+++ b/src/video_frame.cpp
@@ -264,6 +264,9 @@ computePlaneInfos(uintptr_t base, int width, int height, VideoBufferType type) {
 // LKVideoFrame implementation
 // ----------------------------------------------------------------------------
 
+LKVideoFrame::LKVideoFrame()
+    : width_{0}, height_{0}, type_{VideoBufferType::BGRA}, data_{} {}
+
 LKVideoFrame::LKVideoFrame(int width, int height, VideoBufferType type,
                            std::vector<std::uint8_t> data)
     : width_(width), height_(height), type_(type), data_(std::move(data)) {
@@ -310,8 +313,7 @@ LKVideoFrame LKVideoFrame::fromOwnedInfo(const proto::OwnedVideoBuffer &owned) {
   const auto &info = owned.info();
   const int width = static_cast<int>(info.width());
   const int height = static_cast<int>(info.height());
-  // Assuming your C++ enum matches proto's underlying values.
-  const VideoBufferType type = static_cast<VideoBufferType>(info.type());
+  const VideoBufferType type = fromProto(info.type());
 
   std::vector<std::uint8_t> buffer;
 
diff --git a/src/video_stream.cpp b/src/video_stream.cpp
index 19b1903..7847920 100644
--- a/src/video_stream.cpp
+++ b/src/video_stream.cpp
@@ -14,30 +14,22 @@ using proto::FfiEvent;
 using proto::FfiRequest;
 using proto::VideoStreamEvent;
 
-// ------------------------
-// Factory helpers
-// ------------------------
-
-std::unique_ptr<VideoStream>
+std::shared_ptr<VideoStream>
 VideoStream::fromTrack(const std::shared_ptr<Track> &track,
                        const Options &options) {
-  auto stream = std::unique_ptr<VideoStream>(new VideoStream());
+  auto stream = std::shared_ptr<VideoStream>(new VideoStream());
   stream->initFromTrack(track, options);
   return stream;
 }
 
-std::unique_ptr<VideoStream>
+std::shared_ptr<VideoStream>
 VideoStream::fromParticipant(Participant &participant, TrackSource track_source,
                              const Options &options) {
-  auto stream = std::unique_ptr<VideoStream>(new VideoStream());
+  auto stream = std::shared_ptr<VideoStream>(new VideoStream());
   stream->initFromParticipant(participant, track_source, options);
   return stream;
 }
 
-// ------------------------
-// Destructor / move
-// ------------------------
-
 VideoStream::~VideoStream() { close(); }
 
 VideoStream::VideoStream(VideoStream &&other) noexcept {
@@ -77,31 +69,76 @@ VideoStream &VideoStream::operator=(VideoStream &&other) noexcept {
   return *this;
 }
 
-// ------------------------
-// Init internals
-// ------------------------
+// --------------------- Public API ---------------------
+
+bool VideoStream::read(VideoFrameEvent &out) {
+  std::unique_lock<std::mutex> lock(mutex_);
+
+  cv_.wait(lock, [this] { return !queue_.empty() || eof_ || closed_; });
+
+  if (closed_ || (queue_.empty() && eof_)) {
+    return false; // EOS / closed
+  }
+
+  out = std::move(queue_.front());
+  queue_.pop_front();
+  return true;
+}
+
+void VideoStream::close() {
+  std::cout << "VideoSream::close() \n";
+  {
+    std::lock_guard<std::mutex> lock(mutex_);
+    if (closed_) {
+      return;
+    }
+    closed_ = true;
+  }
+
+  // Dispose FFI handle
+  if (stream_handle_.get() != 0) {
+    stream_handle_.reset();
+  }
+
+  // Remove listener
+  if (listener_id_ != 0) {
+    FfiClient::instance().RemoveListener(listener_id_);
+    listener_id_ = 0;
+  }
+
+  // Wake any waiting readers
+  cv_.notify_all();
+}
+
+// --------------------- Internal helpers ---------------------
 
 void VideoStream::initFromTrack(const std::shared_ptr<Track> &track,
                                 const Options &options) {
   capacity_ = options.capacity;
 
-  // 1) Subscribe to FFI events
+  // Subscribe to FFI events, this is essential to get video frames from FFI.
   listener_id_ = FfiClient::instance().AddListener(
       [this](const proto::FfiEvent &e) { this->onFfiEvent(e); });
 
-  // 2) Send FFI request to create a new video stream bound to this track
+  // Send FFI request to create a new video stream bound to this track
   FfiRequest req;
   auto *new_video_stream = req.mutable_new_video_stream();
-  new_video_stream->set_track_handle(track->ffi_handle_id());
+  new_video_stream->set_track_handle(
+      static_cast<uint64_t>(track->ffi_handle_id()));
   new_video_stream->set_type(proto::VideoStreamType::VIDEO_STREAM_NATIVE);
   new_video_stream->set_normalize_stride(true);
   new_video_stream->set_format(toProto(options.format));
 
   auto resp = FfiClient::instance().sendRequest(req);
+  if (!resp.has_new_video_stream()) {
+    std::cerr << "VideoStream::initFromTrack: FFI response missing "
+                 "new_video_stream()\n";
+    throw std::runtime_error("new_video_stream FFI request failed");
+  }
   // Adjust field names to match your proto exactly:
   const auto &stream = resp.new_video_stream().stream();
   stream_handle_ = FfiHandle(static_cast<uintptr_t>(stream.handle().id()));
-  // stream.info() is available if you want to cache metadata.
+  // TODO, do we need to cache the metadata from stream.info ?
 }
 
 void VideoStream::initFromParticipant(Participant &participant,
@@ -129,52 +166,6 @@ void VideoStream::initFromParticipant(Participant &participant,
   stream_handle_ = FfiHandle(static_cast<uintptr_t>(stream.handle().id()));
 }
 
-// ------------------------
-// Public API
-// ------------------------
-
-bool VideoStream::read(VideoFrameEvent &out) {
-  std::unique_lock<std::mutex> lock(mutex_);
-
-  cv_.wait(lock, [this] { return !queue_.empty() || eof_ || closed_; });
-
-  if (closed_ || (queue_.empty() && eof_)) {
-    return false; // EOS / closed
-  }
-
-  out = std::move(queue_.front());
-  queue_.pop_front();
-  return true;
-}
-
-void VideoStream::close() {
-  {
-    std::lock_guard<std::mutex> lock(mutex_);
-    if (closed_) {
-      return;
-    }
-    closed_ = true;
-  }
-
-  // Dispose FFI handle
-  if (stream_handle_.get() != 0) {
-    stream_handle_.reset();
-  }
-
-  // Remove listener
-  if (listener_id_ != 0) {
-    FfiClient::instance().RemoveListener(listener_id_);
-    listener_id_ = 0;
-  }
-
-  // Wake any waiting readers
-  cv_.notify_all();
-}
-
-// ------------------------
-// Internal helpers
-// ------------------------
-
 void VideoStream::onFfiEvent(const proto::FfiEvent &event) {
   // Filter for video_stream_event first.
   if (event.message_case() != FfiEvent::kVideoStreamEvent) {
@@ -195,7 +186,6 @@ void VideoStream::onFfiEvent(const proto::FfiEvent &event) {
 
     VideoFrameEvent ev{std::move(frame), fr.timestamp_us(),
                        static_cast<VideoRotation>(fr.rotation())};
-
     pushFrame(std::move(ev));
   } else if (vse.has_eos()) {
     pushEos();
@@ -221,14 +211,17 @@ void VideoStream::pushFrame(VideoFrameEvent &&ev) {
 }
 
 void VideoStream::pushEos() {
+  std::cout << "pushEos 1" << std::endl;
   {
     std::lock_guard<std::mutex> lock(mutex_);
     if (eof_) {
+      std::cout << "pushEos 2" << std::endl;
       return;
     }
     eof_ = true;
   }
   cv_.notify_all();
+  std::cout << "pushEos 3" << std::endl;
 }
 
 } // namespace livekit

From cf34bcfe66efc0166f456e98788c37c580163c36 Mon Sep 17 00:00:00 2001
From: shijing xian <shijing.xian@livekit.io>
Date: Tue, 2 Dec 2025 09:58:52 -0800
Subject: [PATCH 4/5] fix the comments and the build

---
 include/livekit/audio_stream.h       | 57 +++++++++++++++-------------
 include/livekit/local_audio_track.h  | 43 +++++++++++++++++----
 include/livekit/local_video_track.h  | 43 +++++++++++++++++----
 include/livekit/remote_audio_track.h | 22 ++++++++---
 include/livekit/remote_video_track.h | 20 ++++++++--
 include/livekit/room_delegate.h      |  1 +
 src/local_audio_track.cpp            |  3 +-
 src/local_video_track.cpp            |  3 +-
 8 files changed, 141 insertions(+), 51 deletions(-)

diff --git a/include/livekit/audio_stream.h b/include/livekit/audio_stream.h
index 3d405b5..3616e8d 100644
--- a/include/livekit/audio_stream.h
+++ b/include/livekit/audio_stream.h
@@ -39,52 +39,55 @@ struct AudioFrameEvent {
   AudioFrame frame;
 };
 
-// Represents a pull-based stream of decoded PCM audio frames coming from
-// a remote (or local) LiveKit track. Similar to VideoStream, but for audio.
-//
-// Typical usage:
-//
-//   AudioStream::Options opts;
-//   auto stream = AudioStream::fromTrack(remoteAudioTrack, opts);
-//
-//   AudioFrameEvent ev;
-//   while (stream->read(ev)) {
-//     // ev.frame contains interleaved int16 PCM samples
-//   }
-//
-//   stream->close();  // optional, called automatically in destructor
+/**
+ * Represents a pull-based stream of decoded PCM audio frames coming from
+ * a remote (or local) LiveKit track. Similar to VideoStream, but for audio.
+ *
+ * Typical usage:
+ *
+ *   AudioStream::Options opts;
+ *   auto stream = AudioStream::fromTrack(remoteAudioTrack, opts);
+ *
+ *  AudioFrameEvent ev;
+ *    while (stream->read(ev)) {
+ *     // ev.frame contains interleaved int16 PCM samples
+ *   }
+ *
+ *   stream->close();  // optional, called automatically in destructor
+ */
 class AudioStream {
 public:
-  // Configuration options for AudioStream creation.
+  /// Configuration options for AudioStream creation.
   struct Options {
-    // Maximum number of AudioFrameEvent items buffered in the internal queue.
-    // 0 means "unbounded" (the queue can grow without limit).
-    //
-    // Using a small non-zero capacity gives ring-buffer semantics:
-    // if the queue is full, the oldest frame is dropped when a new one arrives.
+    /// Maximum number of AudioFrameEvent items buffered in the internal queue.
+    /// 0 means "unbounded" (the queue can grow without limit).
+    ///
+    /// Using a small non-zero capacity gives ring-buffer semantics:
+    /// if the queue is full, the oldest frame is dropped when a new one
+    /// arrives.
     std::size_t capacity{0};
 
-    // Optional: name of a noise cancellation module to enable for this stream.
-    // Empty string means "no noise cancellation".
+    /// Optional: name of a noise cancellation module to enable for this stream.
+    /// Empty string means "no noise cancellation".
     std::string noise_cancellation_module;
 
-    // Optional: JSON-encoded configuration for the noise cancellation module.
-    // Empty string means "use module defaults".
+    /// Optional: JSON-encoded configuration for the noise cancellation module.
+    /// Empty string means "use module defaults".
     std::string noise_cancellation_options_json;
   };
 
-  // Factory: create an AudioStream bound to a specific Track
+  /// Factory: create an AudioStream bound to a specific Track
   static std::shared_ptr<AudioStream>
   fromTrack(const std::shared_ptr<Track> &track, const Options &options);
 
-  // Factory: create an AudioStream from a Participant + TrackSource
+  /// Factory: create an AudioStream from a Participant + TrackSource
   static std::shared_ptr<AudioStream> fromParticipant(Participant &participant,
                                                       TrackSource track_source,
                                                       const Options &options);
 
   ~AudioStream();
 
-  // No copy, assignment constructors.
+  /// No copy, assignment constructors.
   AudioStream(const AudioStream &) = delete;
   AudioStream &operator=(const AudioStream &) = delete;
   AudioStream(AudioStream &&) noexcept;
diff --git a/include/livekit/local_audio_track.h b/include/livekit/local_audio_track.h
index 66cc52b..1d99c0a 100644
--- a/include/livekit/local_audio_track.h
+++ b/include/livekit/local_audio_track.h
@@ -28,25 +28,54 @@ class OwnedTrack;
 
 class AudioSource;
 
-// ============================================================
-// LocalAudioTrack
-// ============================================================
+/**
+ * Represents a user-provided audio track sourced from the local device.
+ *
+ *  `LocalAudioTrack` is used to publish microphone audio (or any custom
+ *  audio source) to a LiveKit room. It wraps a platform-specific audio
+ *  source and exposes simple controls such as `mute()` and `unmute()`.
+ *
+ *  Typical usage:
+ *
+ *    auto source = AudioSource::create(...);
+ *    auto track = LocalAudioTrack::createLocalAudioTrack("mic", source);
+ *    room->localParticipant()->publishTrack(track);
+ *
+ *  Muting a local audio track stops transmitting audio to the room, but
+ *  the underlying source may continue capturing depending on platform
+ *  behavior.
+ *
+ *  The track name provided during creation is visible to remote
+ *  participants and can be used for debugging or UI display.
+ */
 class LocalAudioTrack : public Track {
 public:
-  explicit LocalAudioTrack(FfiHandle handle, const proto::OwnedTrack &track);
-
+  /// Creates a new local audio track backed by the given `AudioSource`.
+  ///
+  /// @param name   Human-readable name for the track. This may appear to
+  ///               remote participants and in analytics/debug logs.
+  /// @param source The audio source that produces PCM frames for this track.
+  ///
+  /// @return A shared pointer to the newly constructed `LocalAudioTrack`.
   static std::shared_ptr<LocalAudioTrack>
   createLocalAudioTrack(const std::string &name,
                         const std::shared_ptr<AudioSource> &source);
 
-  // Mute/unmute
+  /// Mutes the audio track.
+  ///
+  /// A muted track stops sending audio to the room, but the track remains
+  /// published and can be unmuted later without renegotiation.
   void mute();
+
+  /// Unmutes the audio track and resumes sending audio to the room.
   void unmute();
 
+  /// Returns a human-readable string representation of the track,
+  /// including its SID and name. Useful for debugging and logging.
   std::string to_string() const;
 
 private:
-  // Optional: you may add private helpers if needed
+  explicit LocalAudioTrack(FfiHandle handle, const proto::OwnedTrack &track);
 };
 
 } // namespace livekit
\ No newline at end of file
diff --git a/include/livekit/local_video_track.h b/include/livekit/local_video_track.h
index 63b710a..2e0e22f 100644
--- a/include/livekit/local_video_track.h
+++ b/include/livekit/local_video_track.h
@@ -28,25 +28,54 @@ class OwnedTrack;
 
 class VideoSource;
 
-// ============================================================
-// LocalAudioTrack
-// ============================================================
+/**
+ * Represents a user-provided video track sourced from the local device.
+ *
+ *  `LocalVideoTrack` is used to publish camera video (or any custom
+ *  video source) to a LiveKit room. It wraps a platform-specific video
+ *  source and exposes simple controls such as `mute()` and `unmute()`.
+ *
+ *  Typical usage:
+ *
+ *    auto source = VideoSource::create(...);
+ *    auto track = LocalVideoTrack::createLocalVideoTrack("cam", source);
+ *    room->localParticipant()->publishTrack(track);
+ *
+ *  Muting a local video track stops transmitting video to the room, but
+ *  the underlying source may continue capturing depending on platform
+ *  behavior.
+ *
+ *  The track name provided during creation is visible to remote
+ *  participants and can be used for debugging or UI display.
+ */
 class LocalVideoTrack : public Track {
 public:
-  explicit LocalVideoTrack(FfiHandle handle, const proto::OwnedTrack &track);
-
+  /// Creates a new local video track backed by the given `VideoSource`.
+  ///
+  /// @param name   Human-readable name for the track. This may appear to
+  ///               remote participants and in analytics/debug logs.
+  /// @param source The video source that produces video frames for this track.
+  ///
+  /// @return A shared pointer to the newly constructed `LocalVideoTrack`.
   static std::shared_ptr<LocalVideoTrack>
   createLocalVideoTrack(const std::string &name,
                         const std::shared_ptr<VideoSource> &source);
 
-  // Mute/unmute
+  /// Mutes the video track.
+  ///
+  /// A muted track stops sending video to the room, but the track remains
+  /// published and can be unmuted later without renegotiation.
   void mute();
+
+  /// Unmutes the video track and resumes sending video to the room.
   void unmute();
 
+  /// Returns a human-readable string representation of the track,
+  /// including its SID and name. Useful for debugging and logging.
   std::string to_string() const;
 
 private:
-  // Optional: you may add private helpers if needed
+  explicit LocalVideoTrack(FfiHandle handle, const proto::OwnedTrack &track);
 };
 
 } // namespace livekit
\ No newline at end of file
diff --git a/include/livekit/remote_audio_track.h b/include/livekit/remote_audio_track.h
index 99e4c8e..572e62c 100644
--- a/include/livekit/remote_audio_track.h
+++ b/include/livekit/remote_audio_track.h
@@ -28,16 +28,28 @@ class OwnedTrack;
 
 class AudioSource;
 
-// ============================================================
-// RemoteAudioTrack
-// ============================================================
+/**
+ * Represents an audio track published by a remote participant and
+ * subscribed to by the local participant.
+ *
+ * `RemoteAudioTrack` instances are created internally when the SDK receives a
+ * `kTrackSubscribed` event. Each instance is owned by its associated
+ * `RemoteParticipant` and delivered to the application via
+ * `TrackSubscribedEvent`.
+ *
+ * Applications generally interact with `RemoteAudioTrack` through events and
+ * `RemoteTrackPublication`, not through direct construction.
+ */
 class RemoteAudioTrack : public Track {
 public:
+  /// Constructs a `RemoteAudioTrack` from an internal protocol-level
+  /// `OwnedTrack` description provided by the signaling/FFI layer.
+  /// This constructor is intended for internal SDK use only.
   explicit RemoteAudioTrack(const proto::OwnedTrack &track);
 
+  /// Returns a concise, human-readable string summarizing the track,
+  /// including its SID and name. Useful for debugging and logging.
   std::string to_string() const;
-
-private:
 };
 
 } // namespace livekit
\ No newline at end of file
diff --git a/include/livekit/remote_video_track.h b/include/livekit/remote_video_track.h
index c6abfce..ff65c4c 100644
--- a/include/livekit/remote_video_track.h
+++ b/include/livekit/remote_video_track.h
@@ -28,13 +28,27 @@ class OwnedTrack;
 
 class VideoSource;
 
-// ============================================================
-// RemoteVideoTrack
-// ============================================================
+/**
+ * Represents an video track published by a remote participant and
+ * subscribed to by the local participant.
+ *
+ * `RemoteVideoTrack` instances are created internally when the SDK receives a
+ * `kTrackSubscribed` event. Each instance is owned by its associated
+ * `RemoteParticipant` and delivered to the application via
+ * `TrackSubscribedEvent`.
+ *
+ * Applications generally interact with `RemoteVideoTrack` through events and
+ * `RemoteTrackPublication`, not through direct construction.
+ */
 class RemoteVideoTrack : public Track {
 public:
+  /// Constructs a `RemoteVideoTrack` from an internal protocol-level
+  /// `OwnedTrack` description provided by the signaling/FFI layer.
+  /// This constructor is intended for internal SDK use only.
   explicit RemoteVideoTrack(const proto::OwnedTrack &track);
 
+  /// Returns a concise, human-readable string summarizing the track,
+  /// including its SID and name. Useful for debugging and logging.
   std::string to_string() const;
 
 private:
diff --git a/include/livekit/room_delegate.h b/include/livekit/room_delegate.h
index 9073654..0e73fae 100644
--- a/include/livekit/room_delegate.h
+++ b/include/livekit/room_delegate.h
@@ -18,6 +18,7 @@
 
 #include <cstdint>
 #include <map>
+#include <memory>
 #include <optional>
 #include <string>
 #include <vector>
diff --git a/src/local_audio_track.cpp b/src/local_audio_track.cpp
index 663d0e3..02d73b0 100644
--- a/src/local_audio_track.cpp
+++ b/src/local_audio_track.cpp
@@ -41,7 +41,8 @@ std::shared_ptr<LocalAudioTrack> LocalAudioTrack::createLocalAudioTrack(
   proto::FfiResponse resp = FfiClient::instance().sendRequest(req);
   const proto::OwnedTrack &owned = resp.create_audio_track().track();
   FfiHandle handle(static_cast<uintptr_t>(owned.handle().id()));
-  return std::make_shared<LocalAudioTrack>(std::move(handle), owned);
+  return std::shared_ptr<LocalAudioTrack>(
+      new LocalAudioTrack(std::move(handle), owned));
 }
 
 void LocalAudioTrack::mute() {
diff --git a/src/local_video_track.cpp b/src/local_video_track.cpp
index 455176a..7294a99 100644
--- a/src/local_video_track.cpp
+++ b/src/local_video_track.cpp
@@ -41,7 +41,8 @@ std::shared_ptr<LocalVideoTrack> LocalVideoTrack::createLocalVideoTrack(
   proto::FfiResponse resp = FfiClient::instance().sendRequest(req);
   const proto::OwnedTrack &owned = resp.create_video_track().track();
   FfiHandle handle(static_cast<uintptr_t>(owned.handle().id()));
-  return std::make_shared<LocalVideoTrack>(std::move(handle), owned);
+  return std::shared_ptr<LocalVideoTrack>(
+      new LocalVideoTrack(std::move(handle), owned));
 }
 
 void LocalVideoTrack::mute() {

From 7141457ad49aaf35f4a99f68adc6853abeb43956 Mon Sep 17 00:00:00 2001
From: shijing xian <shijing.xian@livekit.io>
Date: Tue, 2 Dec 2025 10:15:58 -0800
Subject: [PATCH 5/5] another try to fix the linux build

---
 examples/CMakeLists.txt                   | 3 +++
 examples/simple_room/fallback_capture.cpp | 3 +++
 examples/simple_room/main.cpp             | 2 ++
 3 files changed, 8 insertions(+)

diff --git a/examples/CMakeLists.txt b/examples/CMakeLists.txt
index ef7076e..f242af3 100644
--- a/examples/CMakeLists.txt
+++ b/examples/CMakeLists.txt
@@ -1,6 +1,9 @@
 cmake_minimum_required(VERSION 3.31.0)
 project (livekit-examples)
 
+set(CMAKE_CXX_STANDARD 17)
+set(CMAKE_CXX_STANDARD_REQUIRED ON)
+
 list(APPEND CMAKE_MODULE_PATH "${CMAKE_CURRENT_SOURCE_DIR}/cmake")
 include(sdl3)
 
diff --git a/examples/simple_room/fallback_capture.cpp b/examples/simple_room/fallback_capture.cpp
index 158b81c..e0c3c8c 100644
--- a/examples/simple_room/fallback_capture.cpp
+++ b/examples/simple_room/fallback_capture.cpp
@@ -16,6 +16,9 @@
 
 #include "fallback_capture.h"
 
+#include <chrono>
+#include <thread>
+
 #include "livekit/livekit.h"
 #include "wav_audio_source.h"
 
diff --git a/examples/simple_room/main.cpp b/examples/simple_room/main.cpp
index dd23091..e675a2f 100644
--- a/examples/simple_room/main.cpp
+++ b/examples/simple_room/main.cpp
@@ -18,7 +18,9 @@
 #include <chrono>
 #include <csignal>
 #include <cstdlib>
+#include <functional>
 #include <iostream>
+#include <queue>
 #include <random>
 #include <string>
 #include <thread>