From c5434eb44989394255691e6379bcd5957712ecba Mon Sep 17 00:00:00 2001 From: shijing xian Date: Mon, 24 Nov 2025 10:14:07 -0800 Subject: [PATCH] some minor polish --- CMakeLists.txt | 12 + examples/simple_room/main.cpp | 116 ++++++++- include/livekit/livekit.h | 5 +- include/livekit/local_video_track.h | 52 +++++ include/livekit/remote_audio_track.h | 47 ++++ include/livekit/remote_video_track.h | 47 ++++ include/livekit/video_frame.h | 133 +++++++++++ include/livekit/video_source.h | 92 ++++++++ src/local_video_track.cpp | 81 +++++++ src/remote_audio_track.cpp | 51 ++++ src/remote_video_track.cpp | 51 ++++ src/video_frame.cpp | 338 +++++++++++++++++++++++++++ src/video_source.cpp | 68 ++++++ src/video_utils.cpp | 173 ++++++++++++++ src/video_utils.h | 30 +++ 15 files changed, 1283 insertions(+), 13 deletions(-) create mode 100644 include/livekit/local_video_track.h create mode 100644 include/livekit/remote_audio_track.h create mode 100644 include/livekit/remote_video_track.h create mode 100644 include/livekit/video_frame.h create mode 100644 include/livekit/video_source.h create mode 100644 src/local_video_track.cpp create mode 100644 src/remote_audio_track.cpp create mode 100644 src/remote_video_track.cpp create mode 100644 src/video_frame.cpp create mode 100644 src/video_source.cpp create mode 100644 src/video_utils.cpp create mode 100644 src/video_utils.h diff --git a/CMakeLists.txt b/CMakeLists.txt index 07d31d3..07fcb34 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -163,6 +163,7 @@ add_library(livekit include/livekit/ffi_handle.h include/livekit/ffi_client.h include/livekit/local_audio_track.h + include/livekit/remote_audio_track.h include/livekit/participant.h include/livekit/local_participant.h include/livekit/livekit.h @@ -171,11 +172,16 @@ add_library(livekit include/livekit/track_publication.h include/livekit/local_track_publication.h include/livekit/remote_track_publication.h + include/livekit/video_frame.h + include/livekit/video_source.h + include/livekit/local_video_track.h + include/livekit/remote_video_track.h src/audio_frame.cpp src/audio_source.cpp src/ffi_handle.cpp src/ffi_client.cpp src/local_audio_track.cpp + src/remote_audio_track.cpp src/room.cpp src/room_proto_converter.cpp src/room_proto_converter.h @@ -187,6 +193,12 @@ add_library(livekit src/track_publication.cpp src/local_track_publication.cpp src/remote_track_publication.cpp + src/video_frame.cpp + src/video_source.cpp + src/local_video_track.cpp + src/remote_video_track.cpp + src/video_utils.cpp + src/video_utils.h ) # Add generated proto objects to the wrapper diff --git a/examples/simple_room/main.cpp b/examples/simple_room/main.cpp index 5d69c70..6f3d611 100644 --- a/examples/simple_room/main.cpp +++ b/examples/simple_room/main.cpp @@ -13,6 +13,9 @@ // TODO(shijing), remove this livekit_ffi.h as it should be internal only. #include "livekit_ffi.h" +// Consider expose this video_utils.h to public ? +#include "video_utils.h" + using namespace livekit; namespace { @@ -160,6 +163,58 @@ void runNoiseCaptureLoop(const std::shared_ptr &source) { std::cout << "Error in clearQueue" << std::endl; } } + +void runFakeVideoCaptureLoop(const std::shared_ptr &source) { + auto frame = LKVideoFrame::create(1280, 720, VideoBufferType::ARGB); + double framerate = 1.0 / 30; + while (g_running.load(std::memory_order_relaxed)) { + static auto start = std::chrono::high_resolution_clock::now(); + float t = std::chrono::duration( + std::chrono::high_resolution_clock::now() - start) + .count(); + // Cycle every 4 seconds: 0=red, 1=green, 2=blue, 3 black + int stage = static_cast(t) % 4; + std::vector rgb(4); + switch (stage) { + case 0: // red + rgb[0] = 255; + rgb[1] = 0; + rgb[2] = 0; + break; + case 1: // green + rgb[0] = 0; + rgb[1] = 255; + rgb[2] = 0; + break; + case 2: // blue + rgb[0] = 0; + rgb[1] = 0; + rgb[2] = 255; + break; + case 4: // black + rgb[0] = 0; + rgb[1] = 0; + rgb[2] = 0; + } + for (size_t i = 0; i < frame.dataSize(); i += 4) { + frame.data()[i] = 255; + frame.data()[i + 1] = rgb[0]; + frame.data()[i + 2] = rgb[1]; + frame.data()[i + 3] = rgb[2]; + } + LKVideoFrame i420 = convertViaFfi(frame, VideoBufferType::I420, false); + try { + source->captureFrame(frame, 0, VideoRotation::VIDEO_ROTATION_0); + } catch (const std::exception &e) { + // If something goes wrong, log and break out + std::cerr << "Error in captureFrame: " << e.what() << std::endl; + break; + } + + std::this_thread::sleep_for(std::chrono::duration(framerate)); + } +} + } // namespace int main(int argc, char *argv[]) { @@ -210,28 +265,29 @@ int main(int argc, char *argv[]) { << info.reliable_dc_buffered_amount_low_threshold << "\n" << " Creation time (ms): " << info.creation_time << "\n"; + // Setup Audio Source / Track auto audioSource = std::make_shared(44100, 1, 10); auto audioTrack = LocalAudioTrack::createLocalAudioTrack("micTrack", audioSource); - TrackPublishOptions opts; - opts.source = TrackSource::SOURCE_MICROPHONE; - opts.dtx = false; - opts.simulcast = false; - + TrackPublishOptions audioOpts; + audioOpts.source = TrackSource::SOURCE_MICROPHONE; + audioOpts.dtx = false; + audioOpts.simulcast = false; + std::shared_ptr audioPub; try { // publishTrack takes std::shared_ptr, LocalAudioTrack derives from // Track - auto pub = room.local_participant()->publishTrack(audioTrack, opts); + audioPub = room.local_participant()->publishTrack(audioTrack, audioOpts); std::cout << "Published track:\n" - << " SID: " << pub->sid() << "\n" - << " Name: " << pub->name() << "\n" - << " Kind: " << static_cast(pub->kind()) << "\n" - << " Source: " << static_cast(pub->source()) << "\n" - << " Simulcasted: " << std::boolalpha << pub->simulcasted() + << " SID: " << audioPub->sid() << "\n" + << " Name: " << audioPub->name() << "\n" + << " Kind: " << static_cast(audioPub->kind()) << "\n" + << " Source: " << static_cast(audioPub->source()) << "\n" + << " Simulcasted: " << std::boolalpha << audioPub->simulcasted() << "\n" - << " Muted: " << std::boolalpha << pub->muted() << "\n"; + << " Muted: " << std::boolalpha << audioPub->muted() << "\n"; } catch (const std::exception &e) { std::cerr << "Failed to publish track: " << e.what() << std::endl; } @@ -239,6 +295,34 @@ int main(int argc, char *argv[]) { // TODO, if we have pre-buffering feature, we might consider starting the // thread right after creating the source. std::thread audioThread(runNoiseCaptureLoop, audioSource); + + // Setup Video Source / Track + auto videoSource = std::make_shared(1280, 720); + std::shared_ptr videoTrack = + LocalVideoTrack::createLocalVideoTrack("cam", videoSource); + TrackPublishOptions videoOpts; + videoOpts.source = TrackSource::SOURCE_CAMERA; + videoOpts.dtx = false; + videoOpts.simulcast = true; + std::shared_ptr videoPub; + try { + // publishTrack takes std::shared_ptr, LocalAudioTrack derives from + // Track + videoPub = room.local_participant()->publishTrack(videoTrack, videoOpts); + + std::cout << "Published track:\n" + << " SID: " << videoPub->sid() << "\n" + << " Name: " << videoPub->name() << "\n" + << " Kind: " << static_cast(videoPub->kind()) << "\n" + << " Source: " << static_cast(videoPub->source()) << "\n" + << " Simulcasted: " << std::boolalpha << videoPub->simulcasted() + << "\n" + << " Muted: " << std::boolalpha << videoPub->muted() << "\n"; + } catch (const std::exception &e) { + std::cerr << "Failed to publish track: " << e.what() << std::endl; + } + std::thread videoThread(runFakeVideoCaptureLoop, videoSource); + // Keep the app alive until Ctrl-C so we continue receiving events, // similar to asyncio.run(main()) keeping the loop running. while (g_running.load()) { @@ -249,6 +333,14 @@ int main(int argc, char *argv[]) { if (audioThread.joinable()) { audioThread.join(); } + // Clean up the audio track publishment + room.local_participant()->unpublishTrack(audioPub->sid()); + + if (videoThread.joinable()) { + videoThread.join(); + } + // Clean up the video track publishment + room.local_participant()->unpublishTrack(videoPub->sid()); FfiClient::instance().shutdown(); std::cout << "Exiting.\n"; diff --git a/include/livekit/livekit.h b/include/livekit/livekit.h index 015521c..23d63c4 100644 --- a/include/livekit/livekit.h +++ b/include/livekit/livekit.h @@ -19,7 +19,10 @@ #include "local_audio_track.h" #include "local_participant.h" #include "local_track_publication.h" +#include "local_video_track.h" #include "participant.h" #include "room.h" #include "room_delegate.h" -#include "track_publication.h" \ No newline at end of file +#include "track_publication.h" +#include "video_frame.h" +#include "video_source.h" \ No newline at end of file diff --git a/include/livekit/local_video_track.h b/include/livekit/local_video_track.h new file mode 100644 index 0000000..63b710a --- /dev/null +++ b/include/livekit/local_video_track.h @@ -0,0 +1,52 @@ +/* + * Copyright 2025 LiveKit + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an “AS IS” BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include "track.h" +#include +#include + +namespace livekit { + +namespace proto { +class OwnedTrack; +} + +class VideoSource; + +// ============================================================ +// LocalAudioTrack +// ============================================================ +class LocalVideoTrack : public Track { +public: + explicit LocalVideoTrack(FfiHandle handle, const proto::OwnedTrack &track); + + static std::shared_ptr + createLocalVideoTrack(const std::string &name, + const std::shared_ptr &source); + + // Mute/unmute + void mute(); + void unmute(); + + std::string to_string() const; + +private: + // Optional: you may add private helpers if needed +}; + +} // namespace livekit \ No newline at end of file diff --git a/include/livekit/remote_audio_track.h b/include/livekit/remote_audio_track.h new file mode 100644 index 0000000..97ca2df --- /dev/null +++ b/include/livekit/remote_audio_track.h @@ -0,0 +1,47 @@ +/* + * Copyright 2025 LiveKit + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an “AS IS” BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include "track.h" +#include +#include + +namespace livekit { + +namespace proto { +class OwnedTrack; +} + +class AudioSource; + +// ============================================================ +// RemoteAudioTrack +// ============================================================ +class RemoteAudioTrack : public Track { +public: + explicit RemoteAudioTrack(FfiHandle handle, const proto::OwnedTrack &track); + + static std::shared_ptr + createRemoteAudioTrack(const std::string &name, + const std::shared_ptr &source); + + std::string to_string() const; + +private: +}; + +} // namespace livekit \ No newline at end of file diff --git a/include/livekit/remote_video_track.h b/include/livekit/remote_video_track.h new file mode 100644 index 0000000..cbed139 --- /dev/null +++ b/include/livekit/remote_video_track.h @@ -0,0 +1,47 @@ +/* + * Copyright 2025 LiveKit + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an “AS IS” BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include "track.h" +#include +#include + +namespace livekit { + +namespace proto { +class OwnedTrack; +} + +class VideoSource; + +// ============================================================ +// RemoteVideoTrack +// ============================================================ +class RemoteVideoTrack : public Track { +public: + explicit RemoteVideoTrack(FfiHandle handle, const proto::OwnedTrack &track); + + static std::shared_ptr + createRemoteVideoTrack(const std::string &name, + const std::shared_ptr &source); + + std::string to_string() const; + +private: +}; + +} // namespace livekit \ No newline at end of file diff --git a/include/livekit/video_frame.h b/include/livekit/video_frame.h new file mode 100644 index 0000000..3ba4f16 --- /dev/null +++ b/include/livekit/video_frame.h @@ -0,0 +1,133 @@ +/* + * Copyright 2025 LiveKit + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an “AS IS” BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include +#include +#include +#include + +namespace livekit { + +// Mirror of WebRTC video buffer type +enum class VideoBufferType { + ARGB, + ABGR, + RGBA, + BGRA, + RGB24, + I420, + I420A, + I422, + I444, + I010, + NV12 +}; + +struct VideoPlaneInfo { + std::uintptr_t data_ptr; // pointer to plane data (for FFI) + std::uint32_t stride; // bytes per row + std::uint32_t size; // plane size in bytes +}; + +/** + * Public SDK representation of a video frame. + * + * - Owns its pixel buffer (std::vector). + * - Developers can allocate and fill frames in C++ and pass them to the SDK. + * - The SDK can expose the backing memory to Rust via data_ptr + layout for + * the duration of a blocking FFI call (similar to AudioFrame). + */ +class LKVideoFrame { +public: + LKVideoFrame() = delete; + LKVideoFrame(int width, int height, VideoBufferType type, + std::vector data); + + LKVideoFrame(const LKVideoFrame &) = delete; + LKVideoFrame &operator=(const LKVideoFrame &) = delete; + LKVideoFrame(LKVideoFrame &&) noexcept = default; + LKVideoFrame &operator=(LKVideoFrame &&) noexcept = default; + + /* LKVideoFrame(LKVideoFrame&& other) noexcept + : width_(other.width_), + height_(other.height_), + type_(other.type_), + data_(std::move(other.data_)) { + other.width_ = 0; + other.height_ = 0; + } + LKVideoFrame& operator=(LKVideoFrame&& other) noexcept;*/ + + /** + * Allocate a new frame with the correct buffer size for the given format. + * Data is zero-initialized. + */ + static LKVideoFrame create(int width, int height, VideoBufferType type); + + // Basic properties + int width() const noexcept { return width_; } + int height() const noexcept { return height_; } + VideoBufferType type() const noexcept { return type_; } + + std::uint8_t *data() noexcept { return data_.data(); } + const std::uint8_t *data() const noexcept { return data_.data(); } + std::size_t dataSize() const noexcept { return data_.size(); } + + /** + * Compute plane layout for this frame (Y/U/V, UV, etc.), in terms of + * pointers & sizes relative to this frame's backing buffer. + * + * For packed formats (ARGB, RGB24) this will be either 1 plane or empty. + */ + std::vector planeInfos() const; + + /** + * Convert this frame into another pixel format. + * + * This uses the underlying FFI `video_convert` pipeline to transform the + * current frame into a new `LKVideoFrame` with the requested + * `dst` buffer type (e.g. ARGB → I420, BGRA → RGB24, etc.). + * + * @param dst Desired output format (see VideoBufferType). + * @param flip_y If true, the converted frame will be vertically flipped. + * + * @return A new LKVideoFrame containing the converted image data. + * + * Notes: + * - This function allocates a new buffer and copies pixel data; it does + * not modify the original frame. + * - This function performs a full CPU-based pixel conversion**. Depending + * on resolution and format, this may involve substantial computation + * (e.g., color-space transforms, planar repacking, vertical flipping). + * Avoid calling this inside tight real-time loops unless necessary. + * - Throws std::runtime_error if the FFI conversion fails or if the + * format combination is unsupported. + * + * Typical usage: + * LKVideoFrame i420 = frame.convert(VideoBufferType::I420); + */ + LKVideoFrame convert(VideoBufferType dst, bool flip_y = false) const; + +private: + int width_; + int height_; + VideoBufferType type_; + std::vector data_; +}; + +} // namespace livekit diff --git a/include/livekit/video_source.h b/include/livekit/video_source.h new file mode 100644 index 0000000..95b6750 --- /dev/null +++ b/include/livekit/video_source.h @@ -0,0 +1,92 @@ +/* + * Copyright 2025 LiveKit + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an “AS IS” BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include + +#include "livekit/ffi_handle.h" + +namespace livekit { + +class LKVideoFrame; + +/** + * Rotation of a video frame. + * + * Mirrors proto_video.VideoRotation but kept as a public SDK enum. + */ +enum class VideoRotation { + VIDEO_ROTATION_0 = 0, + VIDEO_ROTATION_90 = 90, + VIDEO_ROTATION_180 = 180, + VIDEO_ROTATION_270 = 270, +}; + +/** + * Represents a real-time video source that can accept frames from the + * application and feed them into the LiveKit core. + */ +class VideoSource { +public: + /** + * Create a new native video source with a fixed resolution. + * + * @param width Width in pixels. + * @param height Height in pixels. + * + * Throws std::runtime_error if the FFI call fails or the response + * does not contain the expected new_video_source field. + */ + VideoSource(int width, int height); + + // Owned FFI handle will be released by FfiHandle's destructor. + ~VideoSource() = default; + + VideoSource(const VideoSource &) = delete; + VideoSource &operator=(const VideoSource &) = delete; + VideoSource(VideoSource &&) noexcept = default; + VideoSource &operator=(VideoSource &&) noexcept = default; + + /// Source resolution as declared at construction. + int width() const noexcept { return width_; } + int height() const noexcept { return height_; } + + /// Underlying FFI handle ID (0 if invalid). + std::uint64_t ffi_handle_id() const noexcept { return handle_.get(); } + + /** + * Push a LKVideoFrame into the FFI video source. + * + * @param frame Video frame to send. + * @param timestamp_us Optional timestamp in microseconds. + * @param rotation Video rotation enum. + * @param timeout_ms Controls waiting behavior: + * + * Notes: + * - Fire-and-forget to send a frame to FFI + * lifetime correctly (e.g., persistent frame pools, GPU buffers, etc.). + */ + void captureFrame(const LKVideoFrame &frame, std::int64_t timestamp_us = 0, + VideoRotation rotation = VideoRotation::VIDEO_ROTATION_0); + +private: + FfiHandle handle_; // owned FFI handle + int width_{0}; + int height_{0}; +}; + +} // namespace livekit diff --git a/src/local_video_track.cpp b/src/local_video_track.cpp new file mode 100644 index 0000000..455176a --- /dev/null +++ b/src/local_video_track.cpp @@ -0,0 +1,81 @@ +/* + * Copyright 2025 LiveKit + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an “AS IS” BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "livekit/local_video_track.h" + +#include "ffi.pb.h" +#include "livekit/ffi_client.h" +#include "livekit/video_source.h" +#include "track.pb.h" +#include "track_proto_converter.h" + +namespace livekit { + +LocalVideoTrack::LocalVideoTrack(FfiHandle handle, + const proto::OwnedTrack &track) + : Track(std::move(handle), track.info().sid(), track.info().name(), + fromProto(track.info().kind()), + fromProto(track.info().stream_state()), track.info().muted(), + false) {} + +std::shared_ptr LocalVideoTrack::createLocalVideoTrack( + const std::string &name, const std::shared_ptr &source) { + proto::FfiRequest req; + auto *msg = req.mutable_create_video_track(); + msg->set_name(name); + msg->set_source_handle(static_cast(source->ffi_handle_id())); + + proto::FfiResponse resp = FfiClient::instance().sendRequest(req); + const proto::OwnedTrack &owned = resp.create_video_track().track(); + FfiHandle handle(static_cast(owned.handle().id())); + return std::make_shared(std::move(handle), owned); +} + +void LocalVideoTrack::mute() { + if (!has_handle()) { + setMuted(true); + return; + } + + proto::FfiRequest req; + auto *msg = req.mutable_local_track_mute(); + msg->set_track_handle(static_cast(ffi_handle_id())); + msg->set_mute(true); + + (void)FfiClient::instance().sendRequest(req); + setMuted(true); +} + +void LocalVideoTrack::unmute() { + if (!has_handle()) { + setMuted(false); + return; + } + + proto::FfiRequest req; + auto *msg = req.mutable_local_track_mute(); + msg->set_track_handle(static_cast(ffi_handle_id())); + msg->set_mute(false); + + (void)FfiClient::instance().sendRequest(req); + setMuted(false); +} + +std::string LocalVideoTrack::to_string() const { + return "rtc.LocalVideoTrack(sid=" + sid() + ", name=" + name() + ")"; +} + +} // namespace livekit \ No newline at end of file diff --git a/src/remote_audio_track.cpp b/src/remote_audio_track.cpp new file mode 100644 index 0000000..4384920 --- /dev/null +++ b/src/remote_audio_track.cpp @@ -0,0 +1,51 @@ +/* + * Copyright 2025 LiveKit + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an “AS IS” BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "livekit/remote_audio_track.h" + +#include "ffi.pb.h" +#include "livekit/audio_source.h" +#include "livekit/ffi_client.h" +#include "track.pb.h" +#include "track_proto_converter.h" + +namespace livekit { + +RemoteAudioTrack::RemoteAudioTrack(FfiHandle handle, + const proto::OwnedTrack &track) + : Track(std::move(handle), track.info().sid(), track.info().name(), + fromProto(track.info().kind()), + fromProto(track.info().stream_state()), track.info().muted(), + true) {} + +std::shared_ptr RemoteAudioTrack::createRemoteAudioTrack( + const std::string &name, const std::shared_ptr &source) { + proto::FfiRequest req; + auto *msg = req.mutable_create_audio_track(); + msg->set_name(name); + msg->set_source_handle(static_cast(source->ffi_handle_id())); + + proto::FfiResponse resp = FfiClient::instance().sendRequest(req); + const proto::OwnedTrack &owned = resp.create_audio_track().track(); + FfiHandle handle(static_cast(owned.handle().id())); + return std::make_shared(std::move(handle), owned); +} + +std::string RemoteAudioTrack::to_string() const { + return "rtc.RemoteAudioTrack(sid=" + sid() + ", name=" + name() + ")"; +} + +} // namespace livekit \ No newline at end of file diff --git a/src/remote_video_track.cpp b/src/remote_video_track.cpp new file mode 100644 index 0000000..02b5d77 --- /dev/null +++ b/src/remote_video_track.cpp @@ -0,0 +1,51 @@ +/* + * Copyright 2025 LiveKit + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an “AS IS” BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "livekit/remote_video_track.h" + +#include "ffi.pb.h" +#include "livekit/ffi_client.h" +#include "livekit/video_source.h" +#include "track.pb.h" +#include "track_proto_converter.h" + +namespace livekit { + +RemoteVideoTrack::RemoteVideoTrack(FfiHandle handle, + const proto::OwnedTrack &track) + : Track(std::move(handle), track.info().sid(), track.info().name(), + fromProto(track.info().kind()), + fromProto(track.info().stream_state()), track.info().muted(), + true) {} + +std::shared_ptr RemoteVideoTrack::createRemoteVideoTrack( + const std::string &name, const std::shared_ptr &source) { + proto::FfiRequest req; + auto *msg = req.mutable_create_video_track(); + msg->set_name(name); + msg->set_source_handle(static_cast(source->ffi_handle_id())); + + proto::FfiResponse resp = FfiClient::instance().sendRequest(req); + const proto::OwnedTrack &owned = resp.create_video_track().track(); + FfiHandle handle(static_cast(owned.handle().id())); + return std::make_shared(std::move(handle), owned); +} + +std::string RemoteVideoTrack::to_string() const { + return "rtc.RemoteVideoTrack(sid=" + sid() + ", name=" + name() + ")"; +} + +} // namespace livekit \ No newline at end of file diff --git a/src/video_frame.cpp b/src/video_frame.cpp new file mode 100644 index 0000000..badce04 --- /dev/null +++ b/src/video_frame.cpp @@ -0,0 +1,338 @@ +#include "livekit/video_frame.h" + +#include +#include +#include +#include +#include + +#include "video_utils.h" + +namespace livekit { +namespace { + +// Compute total buffer size in bytes for (width, height, type). +std::size_t computeBufferSize(int width, int height, VideoBufferType type) { + if (width <= 0 || height <= 0) { + throw std::invalid_argument( + "LKVideoFrame: width and height must be positive"); + } + + const auto w = static_cast(width); + const auto h = static_cast(height); + switch (type) { + case VideoBufferType::ARGB: + case VideoBufferType::ABGR: + case VideoBufferType::RGBA: + case VideoBufferType::BGRA: + // 4 bytes per pixel + return w * h * 4; + + case VideoBufferType::RGB24: + // 3 bytes per pixel + return w * h * 3; + + case VideoBufferType::I444: + // Y, U, V all full resolution + return w * h * 3; + + case VideoBufferType::I420: + case VideoBufferType::NV12: + case VideoBufferType::I010: { + // Y full, U and V subsampled 2x2 + const std::size_t chroma_w = (w + 1) / 2; + const std::size_t chroma_h = (h + 1) / 2; + if (type == VideoBufferType::I420) { + // Y (1 byte) + U (1 byte) + V (1 byte) + return w * h + chroma_w * chroma_h * 2; + } else if (type == VideoBufferType::NV12) { + // Y (1 byte), UV interleaved (2 bytes per chroma sample) + return w * h + chroma_w * chroma_h * 2; + } else { // I010, 16 bits per sample in memory + // Y: 2 bytes per sample, U & V: 2 bytes per sample + return w * h * 2 + chroma_w * chroma_h * 4; + } + } + + case VideoBufferType::I420A: { + // Y full, U & V 2x2, plus alpha full res + const std::size_t chroma_w = (w + 1) / 2; + const std::size_t chroma_h = (h + 1) / 2; + // Y + A are full resolution, U + V subsampled + return w * h * 2 + chroma_w * chroma_h * 2; + } + + case VideoBufferType::I422: { + // Y full, U & V subsampled horizontally only + const std::size_t chroma_w = (w + 1) / 2; + return w * h + chroma_w * h * 2; + } + + default: + throw std::runtime_error("LKVideoFrame: unsupported VideoBufferType"); + } +} + +// Compute plane layout for (base_ptr, width, height, type) +std::vector +computePlaneInfos(uintptr_t base, int width, int height, VideoBufferType type) { + std::vector planes; + if (!base || width <= 0 || height <= 0) { + std::cerr << "[LKVideoFrame] Warning: invalid planeInfos input (ptr=" + << base << ", w=" << width << ", h=" << height << ")\n"; + return planes; + } + const auto w = static_cast(width); + const auto h = static_cast(height); + auto pushPlane = [&](uintptr_t ptr, uint32_t stride, uint32_t size) { + VideoPlaneInfo info; + info.data_ptr = ptr; + info.stride = stride; + info.size = size; + planes.push_back(info); + }; + + switch (type) { + case VideoBufferType::ARGB: + case VideoBufferType::ABGR: + case VideoBufferType::RGBA: + case VideoBufferType::BGRA: { + const uint32_t stride = w * 4; + const uint32_t size = stride * h; + pushPlane(base, stride, size); + break; + } + + case VideoBufferType::RGB24: { + const uint32_t stride = w * 3; + const uint32_t size = stride * h; + pushPlane(base, stride, size); + break; + } + + case VideoBufferType::I420: { + const uint32_t chroma_w = (w + 1) / 2; + const uint32_t chroma_h = (h + 1) / 2; + + // Y + const uint32_t y_stride = w; + const uint32_t y_size = w * h; + uintptr_t y_ptr = base; + pushPlane(y_ptr, y_stride, y_size); + + // U + const uint32_t u_stride = chroma_w; + const uint32_t u_size = chroma_w * chroma_h; + uintptr_t u_ptr = y_ptr + y_size; + pushPlane(u_ptr, u_stride, u_size); + + // V + const uint32_t v_stride = chroma_w; + const uint32_t v_size = chroma_w * chroma_h; + uintptr_t v_ptr = u_ptr + u_size; + pushPlane(v_ptr, v_stride, v_size); + break; + } + + case VideoBufferType::I420A: { + const uint32_t chroma_w = (w + 1) / 2; + const uint32_t chroma_h = (h + 1) / 2; + + // Y + const uint32_t y_stride = w; + const uint32_t y_size = w * h; + uintptr_t y_ptr = base; + pushPlane(y_ptr, y_stride, y_size); + + // U + const uint32_t u_stride = chroma_w; + const uint32_t u_size = chroma_w * chroma_h; + uintptr_t u_ptr = y_ptr + y_size; + pushPlane(u_ptr, u_stride, u_size); + + // V + const uint32_t v_stride = chroma_w; + const uint32_t v_size = chroma_w * chroma_h; + uintptr_t v_ptr = u_ptr + u_size; + pushPlane(v_ptr, v_stride, v_size); + + // A (full res) + const uint32_t a_stride = w; + const uint32_t a_size = w * h; + uintptr_t a_ptr = v_ptr + v_size; + pushPlane(a_ptr, a_stride, a_size); + break; + } + + case VideoBufferType::I422: { + const uint32_t chroma_w = (w + 1) / 2; + + // Y + const uint32_t y_stride = w; + const uint32_t y_size = w * h; + uintptr_t y_ptr = base; + pushPlane(y_ptr, y_stride, y_size); + + // U + const uint32_t u_stride = chroma_w; + const uint32_t u_size = chroma_w * h; + uintptr_t u_ptr = y_ptr + y_size; + pushPlane(u_ptr, u_stride, u_size); + + // V + const uint32_t v_stride = chroma_w; + const uint32_t v_size = chroma_w * h; + uintptr_t v_ptr = u_ptr + u_size; + pushPlane(v_ptr, v_stride, v_size); + break; + } + + case VideoBufferType::I444: { + // All planes full-res + const uint32_t y_stride = w; + const uint32_t y_size = w * h; + uintptr_t y_ptr = base; + pushPlane(y_ptr, y_stride, y_size); + + const uint32_t u_stride = w; + const uint32_t u_size = w * h; + uintptr_t u_ptr = y_ptr + y_size; + pushPlane(u_ptr, u_stride, u_size); + + const uint32_t v_stride = w; + const uint32_t v_size = w * h; + uintptr_t v_ptr = u_ptr + u_size; + pushPlane(v_ptr, v_stride, v_size); + break; + } + + case VideoBufferType::I010: { + // 16-bit per sample + const uint32_t chroma_w = (w + 1) / 2; + const uint32_t chroma_h = (h + 1) / 2; + + // Y + const uint32_t y_stride = w * 2; + const uint32_t y_size = w * h * 2; + uintptr_t y_ptr = base; + pushPlane(y_ptr, y_stride, y_size); + + // U + const uint32_t u_stride = chroma_w * 2; + const uint32_t u_size = chroma_w * chroma_h * 2; + uintptr_t u_ptr = y_ptr + y_size; + pushPlane(u_ptr, u_stride, u_size); + + // V + const uint32_t v_stride = chroma_w * 2; + const uint32_t v_size = chroma_w * chroma_h * 2; + uintptr_t v_ptr = u_ptr + u_size; + pushPlane(v_ptr, v_stride, v_size); + break; + } + + case VideoBufferType::NV12: { + const uint32_t chroma_w = (w + 1) / 2; + const uint32_t chroma_h = (h + 1) / 2; + + // Y + const uint32_t y_stride = w; + const uint32_t y_size = w * h; + uintptr_t y_ptr = base; + pushPlane(y_ptr, y_stride, y_size); + + // UV interleaved + const uint32_t uv_stride = chroma_w * 2; + const uint32_t uv_size = chroma_w * chroma_h * 2; + uintptr_t uv_ptr = y_ptr + y_size; + pushPlane(uv_ptr, uv_stride, uv_size); + break; + } + + default: + // Unknown or unsupported -> no planes + break; + } + + return planes; +} + +} // namespace + +// ---------------------------------------------------------------------------- +// LKVideoFrame implementation +// ---------------------------------------------------------------------------- + +LKVideoFrame::LKVideoFrame(int width, int height, VideoBufferType type, + std::vector data) + : width_(width), height_(height), type_(type), data_(std::move(data)) { + const std::size_t expected = computeBufferSize(width_, height_, type_); + if (data_.size() < expected) { + throw std::invalid_argument("LKVideoFrame: provided data is too small for " + "the specified format and size"); + } + std::cout << "width_ is " << width_ << std::endl; + std::cout << "height_ is " << height_ << std::endl; +} +/* +LKVideoFrame& LKVideoFrame::operator=(LKVideoFrame&& other) noexcept { + // 1. Self-assignment check + if (this == &other) { + return *this; + } + + // 2. Resource cleanup (The std::vector handles its own memory cleanup, + // but the simple members must be transferred.) + + // 3. Transfer simple members (width, height, type) + width_ = other.width_; + height_ = other.height_; + type_ = other.type_; + + // 4. Transfer complex resource (std::vector) + // std::move() is used to invoke std::vector's move assignment operator + data_ = std::move(other.data_); + + // 5. Optional: Reset the 'other' object to a valid but empty/default state. + // This is good practice for the object that has been moved-from. + other.width_ = 0; + other.height_ = 0; + // other.data_ is already empty after the move assignment + + // 6. Return reference to the assigned object + return *this; +}*/ + +LKVideoFrame LKVideoFrame::create(int width, int height, VideoBufferType type) { + const std::size_t size = computeBufferSize(width, height, type); + std::vector buffer(size, 0); + return LKVideoFrame(width, height, type, std::move(buffer)); +} + +std::vector LKVideoFrame::planeInfos() const { + if (data_.empty()) { + return {}; + } + + uintptr_t base = reinterpret_cast(data_.data()); + return computePlaneInfos(base, width_, height_, type_); +} + +LKVideoFrame LKVideoFrame::convert(VideoBufferType dst, bool flip_y) const { + // Fast path: same format, no flip -> just clone the buffer. + // We still return a *new* LKVideoFrame, never `*this`, so copy-ctor + // being deleted is not a problem. + if (dst == type_ && !flip_y) { + std::cerr << "KVideoFrame::convert Warning: converting to the same format" + << std::endl; + // copy pixel data + std::vector buf = data_; + return LKVideoFrame(width_, height_, type_, std::move(buf)); + } + + // General path: delegate to the FFI-based conversion helper. + // This returns a brand new LKVideoFrame (move-constructed / elided). + return convertViaFfi(*this, dst, flip_y); +} + +} // namespace livekit diff --git a/src/video_source.cpp b/src/video_source.cpp new file mode 100644 index 0000000..1caddc9 --- /dev/null +++ b/src/video_source.cpp @@ -0,0 +1,68 @@ +/* + * Copyright 2025 LiveKit + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an “AS IS” BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "livekit/video_source.h" + +#include +#include + +#include "livekit/ffi_client.h" +#include "livekit/video_frame.h" + +#include "ffi.pb.h" +#include "video_frame.pb.h" +#include "video_utils.h" + +namespace livekit { + +VideoSource::VideoSource(int width, int height) + : width_(width), height_(height) { + + proto::FfiRequest req; + auto *msg = req.mutable_new_video_source(); + msg->set_type(proto::VideoSourceType::VIDEO_SOURCE_NATIVE); + msg->mutable_resolution()->set_width(width_); + msg->mutable_resolution()->set_height(height_); + + auto resp = FfiClient::instance().sendRequest(req); + if (!resp.has_new_video_source()) { + throw std::runtime_error("VideoSource: missing new_video_source"); + } + + handle_ = FfiHandle(resp.new_video_source().source().handle().id()); +} + +void VideoSource::captureFrame(const LKVideoFrame &frame, + std::int64_t timestamp_us, + VideoRotation rotation) { + if (!handle_) { + return; + } + + proto::VideoBufferInfo buf = toProto(frame); + proto::FfiRequest req; + auto *msg = req.mutable_capture_video_frame(); + msg->set_source_handle(handle_.get()); + msg->mutable_buffer()->CopyFrom(buf); + msg->set_timestamp_us(timestamp_us); + msg->set_rotation(static_cast(rotation)); + proto::FfiResponse resp = FfiClient::instance().sendRequest(req); + if (!resp.has_capture_video_frame()) { + throw std::runtime_error("FfiResponse missing capture_video_frame"); + } +} + +} // namespace livekit \ No newline at end of file diff --git a/src/video_utils.cpp b/src/video_utils.cpp new file mode 100644 index 0000000..be1470f --- /dev/null +++ b/src/video_utils.cpp @@ -0,0 +1,173 @@ +#include "livekit/video_frame.h" + +#include +#include +#include + +#include "ffi.pb.h" +#include "livekit/ffi_client.h" +#include "livekit/ffi_handle.h" +#include "video_frame.pb.h" + +namespace livekit { + +namespace { + +// Map SDK enum -> proto enum +proto::VideoBufferType toProtoBufferType(VideoBufferType t) { + switch (t) { + case VideoBufferType::ARGB: + return proto::VideoBufferType::ARGB; + case VideoBufferType::ABGR: + return proto::VideoBufferType::ABGR; + case VideoBufferType::RGBA: + return proto::VideoBufferType::RGBA; + case VideoBufferType::BGRA: + return proto::VideoBufferType::BGRA; + case VideoBufferType::RGB24: + return proto::VideoBufferType::RGB24; + case VideoBufferType::I420: + return proto::VideoBufferType::I420; + case VideoBufferType::I420A: + return proto::VideoBufferType::I420A; + case VideoBufferType::I422: + return proto::VideoBufferType::I422; + case VideoBufferType::I444: + return proto::VideoBufferType::I444; + case VideoBufferType::I010: + return proto::VideoBufferType::I010; + case VideoBufferType::NV12: + return proto::VideoBufferType::NV12; + default: + throw std::runtime_error("Unknown VideoBufferType in toProtoBufferType"); + } +} + +// Map proto enum -> SDK enum +VideoBufferType fromProtoBufferType(proto::VideoBufferType t) { + switch (t) { + case proto::VideoBufferType::ARGB: + return VideoBufferType::ARGB; + case proto::VideoBufferType::ABGR: + return VideoBufferType::ABGR; + case proto::VideoBufferType::RGBA: + return VideoBufferType::RGBA; + case proto::VideoBufferType::BGRA: + return VideoBufferType::BGRA; + case proto::VideoBufferType::RGB24: + return VideoBufferType::RGB24; + case proto::VideoBufferType::I420: + return VideoBufferType::I420; + case proto::VideoBufferType::I420A: + return VideoBufferType::I420A; + case proto::VideoBufferType::I422: + return VideoBufferType::I422; + case proto::VideoBufferType::I444: + return VideoBufferType::I444; + case proto::VideoBufferType::I010: + return VideoBufferType::I010; + case proto::VideoBufferType::NV12: + return VideoBufferType::NV12; + default: + throw std::runtime_error( + "Unknown proto::VideoBufferType in fromProtoBufferType"); + } +} + +} // namespace + +proto::VideoBufferInfo toProto(const LKVideoFrame &frame) { + proto::VideoBufferInfo info; + + const int w = frame.width(); + const int h = frame.height(); + info.set_width(w); + info.set_height(h); + info.set_type(toProtoBufferType(frame.type())); + + // Backing data pointer for the whole buffer + auto base_ptr = reinterpret_cast(frame.data()); + info.set_data_ptr(base_ptr); + + // Compute plane layout for the current format + auto planes = frame.planeInfos(); + for (const auto &plane : planes) { + auto *cmpt = info.add_components(); + cmpt->set_data_ptr(static_cast(plane.data_ptr)); + cmpt->set_stride(plane.stride); + cmpt->set_size(plane.size); + } + + // Stride for main packed formats (matches Python logic) + std::uint32_t stride = 0; + switch (frame.type()) { + case VideoBufferType::ARGB: + case VideoBufferType::ABGR: + case VideoBufferType::RGBA: + case VideoBufferType::BGRA: + stride = static_cast(w) * 4; + break; + case VideoBufferType::RGB24: + stride = static_cast(w) * 3; + break; + default: + stride = 0; // not used / unknown for planar formats + break; + } + info.set_stride(stride); + return info; +} + +LKVideoFrame fromOwnedProto(const proto::OwnedVideoBuffer &owned) { + const auto &info = owned.info(); + + const int width = static_cast(info.width()); + const int height = static_cast(info.height()); + const VideoBufferType type = fromProtoBufferType(info.type()); + + // Allocate a new LKVideoFrame with the correct size/format + LKVideoFrame frame = LKVideoFrame::create(width, height, type); + + // Copy from the FFI-provided buffer into our own backing storage + auto *dst = frame.data(); + const std::size_t dst_size = frame.dataSize(); + + const auto src_ptr = info.data_ptr(); + if (src_ptr == 0) { + throw std::runtime_error("fromOwnedProto: info.data_ptr is null"); + } + const auto *src = reinterpret_cast(src_ptr); + + std::memcpy(dst, src, dst_size); + + // Drop the owned FFI handle to let the core free its side. + { + FfiHandle tmp(owned.handle().id()); + // tmp destructor will dispose the handle via FFI. + } + + return frame; +} + +LKVideoFrame convertViaFfi(const LKVideoFrame &frame, VideoBufferType dst, + bool flip_y) { + proto::FfiRequest req; + auto *vc = req.mutable_video_convert(); + vc->set_flip_y(flip_y); + vc->set_dst_type(toProtoBufferType(dst)); + vc->mutable_buffer()->CopyFrom(toProto(frame)); + + proto::FfiResponse resp = FfiClient::instance().sendRequest(req); + if (!resp.has_video_convert()) { + throw std::runtime_error( + "convertViaFfi: FfiResponse missing video_convert"); + } + const auto &vc_resp = resp.video_convert(); + if (!vc_resp.error().empty()) { + throw std::runtime_error("convertViaFfi: " + vc_resp.error()); + } + // vc_resp.buffer() is an OwnedVideoBuffer + return fromOwnedProto(vc_resp.buffer()); +} + +} // namespace livekit diff --git a/src/video_utils.h b/src/video_utils.h new file mode 100644 index 0000000..5ece8f2 --- /dev/null +++ b/src/video_utils.h @@ -0,0 +1,30 @@ +/* + * Copyright 2025 LiveKit + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an “AS IS” BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include "livekit/video_frame.h" +#include "video_frame.pb.h" + +namespace livekit { + +// Video FFI Utils +proto::VideoBufferInfo toProto(const LKVideoFrame &frame); +LKVideoFrame fromOwnedProto(const proto::OwnedVideoBuffer &owned); +LKVideoFrame convertViaFfi(const LKVideoFrame &frame, VideoBufferType dst, + bool flip_y); + +} // namespace livekit