Skip to content

Commit 3fd7731

Browse files
authored
Add audio renderer API (#1004)
Public API to get audio frames if user desires .
1 parent 12c11de commit 3fd7731

12 files changed

Lines changed: 546 additions & 125 deletions

File tree

.changes/audio-renderer-api

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
minor type="added" "Add audio renderer API for receiving raw audio frames"

android/src/main/kotlin/io/livekit/plugin/AudioRenderer.kt

Lines changed: 59 additions & 92 deletions
Original file line numberDiff line numberDiff line change
@@ -110,12 +110,9 @@ class AudioRenderer(
110110
}
111111

112112
/**
113-
* Converts audio data to raw interleaved bytes.
113+
* Converts audio data to raw interleaved bytes with resampling.
114114
*
115-
* If source and target channel counts match, data is copied directly.
116-
* If target requests fewer channels, the first channels are kept and interleaved.
117-
*
118-
* Sends raw byte arrays instead of boxed sample lists.
115+
* Pipeline: read int16 → resample → channel reduce → format convert (int16/float32)
119116
*/
120117
private fun convertAudioData(
121118
audioData: ByteBuffer,
@@ -138,16 +135,7 @@ class AudioRenderer(
138135
return null
139136
}
140137

141-
val bytesPerSample = 2 // 16-bit
142-
val bytesPerFrame = numberOfChannels * bytesPerSample
143-
if (bytesPerFrame <= 0) {
144-
logDroppedFrame("Invalid bytesPerFrame: $bytesPerFrame")
145-
return null
146-
}
147-
148-
val requestedChannels = targetFormat.numberOfChannels.coerceAtLeast(1)
149-
val outChannels = requestedChannels.coerceAtMost(numberOfChannels)
150-
138+
val bytesPerFrame = numberOfChannels * 2
151139
val buffer = audioData.duplicate()
152140
buffer.order(ByteOrder.LITTLE_ENDIAN)
153141
buffer.rewind()
@@ -159,7 +147,7 @@ class AudioRenderer(
159147
}
160148

161149
val expectedBytes = numberOfFrames.toLong() * bytesPerFrame.toLong()
162-
val frameLength = if (expectedBytes <= availableBytes.toLong()) {
150+
val srcFrames = if (expectedBytes <= availableBytes.toLong()) {
163151
numberOfFrames
164152
} else {
165153
val availableFrames = availableBytes / bytesPerFrame
@@ -173,24 +161,71 @@ class AudioRenderer(
173161
availableFrames
174162
}
175163

164+
// Step 1: Read source int16 samples into ShortArray
165+
val src = ShortArray(srcFrames * numberOfChannels)
166+
for (i in src.indices) {
167+
src[i] = buffer.short
168+
}
169+
170+
// Step 2: Resample to target sample rate
171+
val resampleResult = AudioResampler.resample(
172+
src, srcFrames, sampleRate, targetFormat.sampleRate, numberOfChannels
173+
)
174+
val resampled = resampleResult.samples
175+
val outFrames = resampleResult.frameCount
176+
177+
if (outFrames <= 0) {
178+
logDroppedFrame("Resampled frame count is 0")
179+
return null
180+
}
181+
182+
// Step 3: Channel reduction + format conversion
183+
val requestedChannels = targetFormat.numberOfChannels.coerceAtLeast(1)
184+
val outChannels = requestedChannels.coerceAtMost(numberOfChannels)
185+
176186
val result = mutableMapOf<String, Any>(
177-
"sampleRate" to sampleRate,
187+
"sampleRate" to targetFormat.sampleRate,
178188
"channels" to outChannels,
179-
"frameLength" to frameLength,
189+
"frameLength" to outFrames,
180190
)
181191

182192
when (targetFormat.commonFormat) {
183-
"int16" -> {
184-
result["commonFormat"] = "int16"
185-
result["data"] = extractAsInt16Bytes(buffer, numberOfChannels, outChannels, frameLength)
186-
}
187193
"float32" -> {
188194
result["commonFormat"] = "float32"
189-
result["data"] = extractAsFloat32Bytes(buffer, numberOfChannels, outChannels, frameLength)
195+
val out = ByteArray(outFrames * outChannels * 4)
196+
val outBuf = ByteBuffer.wrap(out).order(ByteOrder.LITTLE_ENDIAN)
197+
for (f in 0 until outFrames) {
198+
for (ch in 0 until outChannels) {
199+
val sample = resampled[f * numberOfChannels + ch].toFloat() / Short.MAX_VALUE
200+
outBuf.putFloat((f * outChannels + ch) * 4, sample)
201+
}
202+
}
203+
result["data"] = out
190204
}
191205
else -> {
192206
result["commonFormat"] = "int16"
193-
result["data"] = extractAsInt16Bytes(buffer, numberOfChannels, outChannels, frameLength)
207+
if (outChannels == numberOfChannels) {
208+
// Fast path: no channel reduction — bulk copy resampled data
209+
val out = ByteArray(outFrames * outChannels * 2)
210+
val outBuf = ByteBuffer.wrap(out).order(ByteOrder.LITTLE_ENDIAN)
211+
for (i in 0 until outFrames * outChannels) {
212+
outBuf.putShort(i * 2, resampled[i])
213+
}
214+
result["data"] = out
215+
} else {
216+
// Channel reduction: keep first outChannels
217+
val out = ByteArray(outFrames * outChannels * 2)
218+
val outBuf = ByteBuffer.wrap(out).order(ByteOrder.LITTLE_ENDIAN)
219+
for (f in 0 until outFrames) {
220+
for (ch in 0 until outChannels) {
221+
outBuf.putShort(
222+
(f * outChannels + ch) * 2,
223+
resampled[f * numberOfChannels + ch]
224+
)
225+
}
226+
}
227+
result["data"] = out
228+
}
194229
}
195230
}
196231

@@ -203,74 +238,6 @@ class AudioRenderer(
203238
Log.w(TAG, "Dropping audio frame #$droppedFrameCount for rendererId=$rendererId: $reason")
204239
}
205240
}
206-
207-
/**
208-
* Extracts int16 PCM bytes from an int16 source buffer.
209-
*
210-
* Fast path when channel counts match (direct copy).
211-
* Otherwise keeps only the first [outChannels] channels, interleaved.
212-
*/
213-
private fun extractAsInt16Bytes(
214-
buffer: ByteBuffer,
215-
srcChannels: Int,
216-
outChannels: Int,
217-
numberOfFrames: Int
218-
): ByteArray {
219-
// Fast path: matching channel count — bulk copy.
220-
if (srcChannels == outChannels) {
221-
val totalBytes = numberOfFrames * outChannels * 2
222-
val out = ByteArray(totalBytes)
223-
buffer.get(out, 0, totalBytes.coerceAtMost(buffer.remaining()))
224-
return out
225-
}
226-
227-
// Channel reduction: keep first outChannels.
228-
val out = ByteArray(numberOfFrames * outChannels * 2)
229-
val outBuf = ByteBuffer.wrap(out).order(ByteOrder.LITTLE_ENDIAN)
230-
231-
for (frame in 0 until numberOfFrames) {
232-
val srcOffset = frame * srcChannels * 2
233-
for (ch in 0 until outChannels) {
234-
val byteIndex = srcOffset + ch * 2
235-
if (byteIndex + 1 < buffer.capacity()) {
236-
buffer.position(byteIndex)
237-
outBuf.putShort((frame * outChannels + ch) * 2, buffer.short)
238-
}
239-
}
240-
}
241-
242-
return out
243-
}
244-
245-
/**
246-
* Converts int16 PCM source to float32 bytes.
247-
*
248-
* Each int16 sample is scaled to the [-1.0, 1.0] range.
249-
* Only the first [outChannels] channels are kept.
250-
*/
251-
private fun extractAsFloat32Bytes(
252-
buffer: ByteBuffer,
253-
srcChannels: Int,
254-
outChannels: Int,
255-
numberOfFrames: Int
256-
): ByteArray {
257-
val out = ByteArray(numberOfFrames * outChannels * 4)
258-
val outBuf = ByteBuffer.wrap(out).order(ByteOrder.LITTLE_ENDIAN)
259-
260-
for (frame in 0 until numberOfFrames) {
261-
val srcOffset = frame * srcChannels * 2
262-
for (ch in 0 until outChannels) {
263-
val byteIndex = srcOffset + ch * 2
264-
if (byteIndex + 1 < buffer.capacity()) {
265-
buffer.position(byteIndex)
266-
val sampleFloat = buffer.short.toFloat() / Short.MAX_VALUE
267-
outBuf.putFloat((frame * outChannels + ch) * 4, sampleFloat)
268-
}
269-
}
270-
}
271-
272-
return out
273-
}
274241
}
275242

276243
/**
Lines changed: 142 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,142 @@
1+
/*
2+
* Copyright 2024 LiveKit, Inc.
3+
*
4+
* Licensed under the Apache License, Version 2.0 (the "License");
5+
* you may not use this file except in compliance with the License.
6+
* You may obtain a copy of the License at
7+
*
8+
* http://www.apache.org/licenses/LICENSE-2.0
9+
*
10+
* Unless required by applicable law or agreed to in writing, software
11+
* distributed under the License is distributed on an "AS IS" BASIS,
12+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
* See the License for the specific language governing permissions and
14+
* limitations under the License.
15+
*/
16+
17+
package io.livekit.plugin
18+
19+
/**
20+
* Pure audio resampler for interleaved int16 PCM data.
21+
*
22+
* - Same rate: passthrough (returns input array as-is)
23+
* - Upsampling: linear interpolation between adjacent samples
24+
* - Downsampling: box filter (averages source samples per output sample) to prevent aliasing
25+
*/
26+
object AudioResampler {
27+
28+
/**
29+
* Resample interleaved int16 PCM audio.
30+
*
31+
* @param src Interleaved int16 samples (channels interleaved per frame)
32+
* @param srcFrames Number of frames in [src] (total samples = srcFrames * channels)
33+
* @param srcRate Source sample rate in Hz
34+
* @param targetRate Target sample rate in Hz
35+
* @param channels Number of interleaved channels
36+
* @return Resampled interleaved int16 samples. Returns [src] unchanged when rates match.
37+
*/
38+
fun resample(
39+
src: ShortArray,
40+
srcFrames: Int,
41+
srcRate: Int,
42+
targetRate: Int,
43+
channels: Int
44+
): ResampleResult {
45+
if (srcRate == targetRate || srcFrames <= 0 || channels <= 0) {
46+
return ResampleResult(src, srcFrames)
47+
}
48+
49+
val outFrames = ((srcFrames.toLong() * targetRate) / srcRate).toInt()
50+
if (outFrames <= 0) {
51+
return ResampleResult(ShortArray(0), 0)
52+
}
53+
54+
val resampled = if (targetRate > srcRate) {
55+
upsample(src, srcFrames, outFrames, channels)
56+
} else {
57+
downsample(src, srcFrames, outFrames, srcRate, targetRate, channels)
58+
}
59+
60+
return ResampleResult(resampled, outFrames)
61+
}
62+
63+
/**
64+
* Linear interpolation upsampling.
65+
*/
66+
private fun upsample(
67+
src: ShortArray,
68+
srcFrames: Int,
69+
outFrames: Int,
70+
channels: Int
71+
): ShortArray {
72+
val out = ShortArray(outFrames * channels)
73+
74+
// Edge case: single source frame — just repeat it
75+
if (srcFrames <= 1) {
76+
for (f in 0 until outFrames) {
77+
for (ch in 0 until channels) {
78+
out[f * channels + ch] = src[ch]
79+
}
80+
}
81+
return out
82+
}
83+
84+
val ratio = srcFrames.toDouble() / outFrames.toDouble()
85+
86+
for (f in 0 until outFrames) {
87+
val srcPos = f * ratio
88+
val idx = srcPos.toInt().coerceAtMost(srcFrames - 2)
89+
val frac = (srcPos - idx).toFloat()
90+
91+
for (ch in 0 until channels) {
92+
val s0 = src[idx * channels + ch]
93+
val s1 = src[(idx + 1) * channels + ch]
94+
out[f * channels + ch] = (s0 + frac * (s1 - s0)).toInt()
95+
.coerceIn(Short.MIN_VALUE.toInt(), Short.MAX_VALUE.toInt())
96+
.toShort()
97+
}
98+
}
99+
100+
return out
101+
}
102+
103+
/**
104+
* Box filter downsampling. Averages all source samples that map to each
105+
* output sample, acting as a low-pass filter to prevent aliasing.
106+
*/
107+
private fun downsample(
108+
src: ShortArray,
109+
srcFrames: Int,
110+
outFrames: Int,
111+
srcRate: Int,
112+
targetRate: Int,
113+
channels: Int
114+
): ShortArray {
115+
val out = ShortArray(outFrames * channels)
116+
val ratio = srcRate.toDouble() / targetRate.toDouble()
117+
118+
for (f in 0 until outFrames) {
119+
val srcStart = (f * ratio).toInt()
120+
val srcEnd = ((f + 1) * ratio).toInt().coerceAtMost(srcFrames)
121+
122+
for (ch in 0 until channels) {
123+
var sum = 0L
124+
for (i in srcStart until srcEnd) {
125+
sum += src[i * channels + ch]
126+
}
127+
val count = srcEnd - srcStart
128+
out[f * channels + ch] = if (count > 0) {
129+
(sum / count).toInt()
130+
.coerceIn(Short.MIN_VALUE.toInt(), Short.MAX_VALUE.toInt())
131+
.toShort()
132+
} else {
133+
0
134+
}
135+
}
136+
}
137+
138+
return out
139+
}
140+
141+
data class ResampleResult(val samples: ShortArray, val frameCount: Int)
142+
}

0 commit comments

Comments
 (0)