Skip to content

Commit 7aea06f

Browse files
committed
Audio: Level Multiplier: Add HiFi3/HiFi4 processing version
This patch adds the HiFi3 and HiFi4 optimized processing. The MCPS performance reported for stereo 48 kHz audio by: process_test('level_multiplier', N, N, 48e3, 0, 1, 'xt-run --mem_model'); is 2.50 / 2.68 / 2.68 for S16_LE / S24_LE / S32_LE formats when processing with non-unity gain. Signed-off-by: Seppo Ingalsuo <seppo.ingalsuo@linux.intel.com>
1 parent 99637fa commit 7aea06f

File tree

4 files changed

+346
-0
lines changed

4 files changed

+346
-0
lines changed

src/audio/level_multiplier/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@ if(CONFIG_COMP_LEVEL_MULTIPLIER STREQUAL "m")
66
else()
77
add_local_sources(sof level_multiplier.c)
88
add_local_sources(sof level_multiplier-generic.c)
9+
add_local_sources(sof level_multiplier-hifi3.c)
910

1011
if(CONFIG_IPC_MAJOR_4)
1112
add_local_sources(sof level_multiplier-ipc4.c)

src/audio/level_multiplier/level_multiplier-generic.c

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,8 @@
1414
#define LEVEL_MULTIPLIER_S24_SHIFT Q_SHIFT_BITS_64(23, LEVEL_MULTIPLIER_QXY_Y, 23)
1515
#define LEVEL_MULTIPLIER_S32_SHIFT Q_SHIFT_BITS_64(31, LEVEL_MULTIPLIER_QXY_Y, 31)
1616

17+
#if SOF_USE_HIFI(NONE, VOLUME)
18+
1719
#if CONFIG_FORMAT_S16LE
1820
/**
1921
* level_multiplier_s16() - Process S16_LE format.
@@ -260,3 +262,5 @@ level_multiplier_func level_multiplier_find_proc_func(enum sof_ipc_frame src_fmt
260262

261263
return NULL;
262264
}
265+
266+
#endif /* SOF_USE_HIFI(NONE, VOLUME) */
Lines changed: 340 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,340 @@
1+
// SPDX-License-Identifier: BSD-3-Clause
2+
//
3+
// Copyright(c) 2025 Intel Corporation.
4+
5+
#include <sof/audio/module_adapter/module/generic.h>
6+
#include <sof/audio/component.h>
7+
#include <sof/audio/sink_api.h>
8+
#include <sof/audio/sink_source_utils.h>
9+
#include <sof/audio/source_api.h>
10+
#include <stdint.h>
11+
#include "level_multiplier.h"
12+
13+
#define LEVEL_MULTIPLIER_S32_SHIFT 8 /* See explanation from level_multiplier_s32() */
14+
15+
#if SOF_USE_MIN_HIFI(3, VOLUME)
16+
17+
#include <xtensa/tie/xt_hifi3.h>
18+
19+
#if CONFIG_FORMAT_S16LE
20+
/**
21+
* level_multiplier_s16() - Process S16_LE format.
22+
* @mod: Pointer to module data.
23+
* @source: Source for PCM samples data.
24+
* @sink: Sink for PCM samples data.
25+
* @frames: Number of audio data frames to process.
26+
*
27+
* This is the processing function for 16-bit signed integer PCM formats. The
28+
* audio samples are copied from source to sink with gain defined in cd->gain.
29+
*
30+
* Return: Value zero for success, otherwise an error code.
31+
*/
32+
static int level_multiplier_s16(const struct processing_module *mod,
33+
struct sof_source *source,
34+
struct sof_sink *sink,
35+
uint32_t frames)
36+
{
37+
struct level_multiplier_comp_data *cd = module_get_private_data(mod);
38+
ae_valign x_align;
39+
ae_valign y_align = AE_ZALIGN64();
40+
ae_f32x2 samples0;
41+
ae_f32x2 samples1;
42+
const ae_f32x2 gain = cd->gain;
43+
ae_f16x4 samples;
44+
ae_f16x4 const *x;
45+
ae_f16x4 *y;
46+
int16_t const *x_start, *x_end;
47+
int16_t *y_start, *y_end;
48+
int x_size, y_size;
49+
int source_samples_without_wrap;
50+
int samples_without_wrap;
51+
int remaining_samples = frames * cd->channels;
52+
int bytes = frames * cd->frame_bytes;
53+
int ret;
54+
int n, i;
55+
56+
ret = source_get_data_s16(source, bytes, (const int16_t **)&x, &x_start, &x_size);
57+
if (ret)
58+
return ret;
59+
60+
/* Similarly get pointer to sink data in circular buffer, buffer start and size. */
61+
ret = sink_get_buffer_s16(sink, bytes, (int16_t **)&y, &y_start, &y_size);
62+
if (ret)
63+
return ret;
64+
65+
/* Set helper pointers to buffer end for wrap check. Then loop until all
66+
* samples are processed.
67+
*/
68+
x_end = x_start + x_size;
69+
y_end = y_start + y_size;
70+
while (remaining_samples) {
71+
/* Find out samples to process before first wrap or end of data. */
72+
source_samples_without_wrap = x_end - (int16_t *)x;
73+
samples_without_wrap = y_end - (int16_t *)y;
74+
samples_without_wrap = MIN(samples_without_wrap, source_samples_without_wrap);
75+
samples_without_wrap = MIN(samples_without_wrap, remaining_samples);
76+
x_align = AE_LA64_PP(x);
77+
78+
/* Process with 64 bit loads and stores */
79+
n = samples_without_wrap >> 2;
80+
for (i = 0; i < n; i++) {
81+
AE_LA16X4_IP(samples, x_align, x);
82+
83+
/* Multiply the input sample */
84+
samples0 = AE_MULFP32X16X2RS_H(gain, samples);
85+
samples1 = AE_MULFP32X16X2RS_L(gain, samples);
86+
87+
/* Q9.23 to Q1.31 */
88+
samples0 = AE_SLAI32S(samples0, 8);
89+
samples1 = AE_SLAI32S(samples1, 8);
90+
91+
/* To Q1.15 */
92+
samples = AE_ROUND16X4F32SSYM(samples0, samples1);
93+
AE_SA16X4_IP(samples, y_align, y);
94+
}
95+
96+
AE_SA64POS_FP(y_align, y);
97+
n = samples_without_wrap - (n << 2);
98+
for (i = 0; i < n; i++) {
99+
AE_L16_IP(samples, (ae_f16 *)x, sizeof(ae_f16));
100+
samples0 = AE_MULFP32X16X2RS_H(gain, samples);
101+
samples0 = AE_SLAI32S(samples0, 8);
102+
samples = AE_ROUND16X4F32SSYM(samples0, samples0);
103+
AE_S16_0_IP(samples, (ae_f16 *)y, sizeof(ae_f16));
104+
}
105+
106+
/* One of the buffers needs a wrap (or end of data), so check for wrap */
107+
x = (x >= (ae_f16x4 *)x_end) ? x - x_size : x;
108+
y = (y >= (ae_f16x4 *)y_end) ? y - y_size : y;
109+
remaining_samples -= samples_without_wrap;
110+
}
111+
112+
/* Update the source and sink for bytes consumed and produced. Return success. */
113+
source_release_data(source, bytes);
114+
sink_commit_buffer(sink, bytes);
115+
return 0;
116+
}
117+
#endif /* CONFIG_FORMAT_S16LE */
118+
119+
#if CONFIG_FORMAT_S24LE
120+
/**
121+
* level_multiplier_s24() - Process S24_4LE format.
122+
* @mod: Pointer to module data.
123+
* @source: Source for PCM samples data.
124+
* @sink: Sink for PCM samples data.
125+
* @frames: Number of audio data frames to process.
126+
*
127+
* This is the processing function for 24-bit signed integer PCM formats. The
128+
* audio samples are copied from source to sink with gain defined in cd->gain.
129+
*
130+
* Return: Value zero for success, otherwise an error code.
131+
*/
132+
static int level_multiplier_s24(const struct processing_module *mod,
133+
struct sof_source *source,
134+
struct sof_sink *sink,
135+
uint32_t frames)
136+
{
137+
struct level_multiplier_comp_data *cd = module_get_private_data(mod);
138+
ae_valign x_align;
139+
ae_valign y_align = AE_ZALIGN64();
140+
const ae_f32x2 gain = cd->gain;
141+
ae_f32x2 samples;
142+
ae_f32x2 const *x;
143+
ae_f32x2 *y;
144+
int32_t const *x_start, *x_end;
145+
int32_t *y_start, *y_end;
146+
int x_size, y_size;
147+
int source_samples_without_wrap;
148+
int samples_without_wrap;
149+
int remaining_samples = frames * cd->channels;
150+
int bytes = frames * cd->frame_bytes;
151+
int ret;
152+
int n, i;
153+
154+
ret = source_get_data_s32(source, bytes, (const int32_t **)&x, &x_start, &x_size);
155+
if (ret)
156+
return ret;
157+
158+
/* Similarly get pointer to sink data in circular buffer, buffer start and size. */
159+
ret = sink_get_buffer_s32(sink, bytes, (int32_t **)&y, &y_start, &y_size);
160+
if (ret)
161+
return ret;
162+
163+
/* Set helper pointers to buffer end for wrap check. Then loop until all
164+
* samples are processed.
165+
*/
166+
x_end = x_start + x_size;
167+
y_end = y_start + y_size;
168+
while (remaining_samples) {
169+
/* Find out samples to process before first wrap or end of data. */
170+
source_samples_without_wrap = x_end - (int32_t *)x;
171+
samples_without_wrap = y_end - (int32_t *)y;
172+
samples_without_wrap = MIN(samples_without_wrap, source_samples_without_wrap);
173+
samples_without_wrap = MIN(samples_without_wrap, remaining_samples);
174+
x_align = AE_LA64_PP(x);
175+
176+
/* Process with 64 bit loads and stores */
177+
n = samples_without_wrap >> 1;
178+
for (i = 0; i < n; i++) {
179+
AE_LA32X2_IP(samples, x_align, x);
180+
samples = AE_MULFP32X2RS(gain, AE_SLAI32(samples, 8));
181+
samples = AE_SLAI32S(samples, 8);
182+
samples = AE_SRAI32(samples, 8);
183+
AE_SA32X2_IP(samples, y_align, y);
184+
}
185+
186+
AE_SA64POS_FP(y_align, y);
187+
if (samples_without_wrap - (n << 1)) {
188+
AE_L32_IP(samples, (ae_f32 *)x, sizeof(ae_f32));
189+
samples = AE_MULFP32X2RS(gain, AE_SLAI32(samples, 8));
190+
samples = AE_SLAI32S(samples, 8);
191+
samples = AE_SRAI32(samples, 8);
192+
AE_S32_L_IP(samples, (ae_f32 *)y, sizeof(ae_f32));
193+
}
194+
195+
/* One of the buffers needs a wrap (or end of data), so check for wrap */
196+
x = (x >= (ae_f32x2 *)x_end) ? x - x_size : x;
197+
y = (y >= (ae_f32x2 *)y_end) ? y - y_size : y;
198+
remaining_samples -= samples_without_wrap;
199+
}
200+
201+
/* Update the source and sink for bytes consumed and produced. Return success. */
202+
source_release_data(source, bytes);
203+
sink_commit_buffer(sink, bytes);
204+
return 0;
205+
}
206+
#endif /* CONFIG_FORMAT_S24LE */
207+
208+
#if CONFIG_FORMAT_S32LE
209+
/**
210+
* level_multiplier_s32() - Process S32_LE format.
211+
* @mod: Pointer to module data.
212+
* @source: Source for PCM samples data.
213+
* @sink: Sink for PCM samples data.
214+
* @frames: Number of audio data frames to process.
215+
*
216+
* This is the processing function for 32-bit signed integer PCM formats. The
217+
* audio samples are copied from source to sink with gain defined in cd->gain.
218+
*
219+
* Return: Value zero for success, otherwise an error code.
220+
*/
221+
static int level_multiplier_s32(const struct processing_module *mod,
222+
struct sof_source *source,
223+
struct sof_sink *sink,
224+
uint32_t frames)
225+
{
226+
struct level_multiplier_comp_data *cd = module_get_private_data(mod);
227+
ae_valign x_align;
228+
ae_valign y_align = AE_ZALIGN64();
229+
ae_f64 mult0;
230+
ae_f64 mult1;
231+
const ae_f32x2 gain = cd->gain;
232+
ae_f32x2 samples;
233+
ae_f32x2 const *x;
234+
ae_f32x2 *y;
235+
int32_t const *x_start, *x_end;
236+
int32_t *y_start, *y_end;
237+
int x_size, y_size;
238+
int source_samples_without_wrap;
239+
int samples_without_wrap;
240+
int remaining_samples = frames * cd->channels;
241+
int bytes = frames * cd->frame_bytes;
242+
int ret;
243+
int n, i;
244+
245+
ret = source_get_data_s32(source, bytes, (const int32_t **)&x, &x_start, &x_size);
246+
if (ret)
247+
return ret;
248+
249+
/* Similarly get pointer to sink data in circular buffer, buffer start and size. */
250+
ret = sink_get_buffer_s32(sink, bytes, (int32_t **)&y, &y_start, &y_size);
251+
if (ret)
252+
return ret;
253+
254+
/* Set helper pointers to buffer end for wrap check. Then loop until all
255+
* samples are processed.
256+
*/
257+
x_end = x_start + x_size;
258+
y_end = y_start + y_size;
259+
while (remaining_samples) {
260+
/* Find out samples to process before first wrap or end of data. */
261+
source_samples_without_wrap = x_end - (int32_t *)x;
262+
samples_without_wrap = y_end - (int32_t *)y;
263+
samples_without_wrap = MIN(samples_without_wrap, source_samples_without_wrap);
264+
samples_without_wrap = MIN(samples_without_wrap, remaining_samples);
265+
x_align = AE_LA64_PP(x);
266+
267+
/* Process with 64 bit loads and stores */
268+
n = samples_without_wrap >> 1;
269+
for (i = 0; i < n; i++) {
270+
AE_LA32X2_IP(samples, x_align, x);
271+
/* Q31 gain would give Q47, then Q23 gain gives Q39, need to shift
272+
* the product left by 8 to get Q47 for round instruction.
273+
*/
274+
mult0 = AE_MULF32R_HH(gain, samples);
275+
mult1 = AE_MULF32R_LL(gain, samples);
276+
mult0 = AE_SLAI64(mult0, LEVEL_MULTIPLIER_S32_SHIFT);
277+
mult1 = AE_SLAI64(mult1, LEVEL_MULTIPLIER_S32_SHIFT);
278+
samples = AE_ROUND32X2F48SSYM(mult0, mult1); /* Q2.47 -> Q1.31 */
279+
AE_SA32X2_IP(samples, y_align, y);
280+
}
281+
282+
AE_SA64POS_FP(y_align, y);
283+
if (samples_without_wrap - (n << 1)) {
284+
AE_L32_IP(samples, (ae_f32 *)x, sizeof(ae_f32));
285+
mult0 = AE_MULF32R_HH(gain, samples);
286+
mult0 = AE_SLAI64(mult0, LEVEL_MULTIPLIER_S32_SHIFT);
287+
samples = AE_ROUND32F48SSYM(mult0);
288+
AE_S32_L_IP(samples, (ae_f32 *)y, sizeof(ae_f32));
289+
}
290+
291+
/* One of the buffers needs a wrap (or end of data), so check for wrap */
292+
x = (x >= (ae_f32x2 *)x_end) ? x - x_size : x;
293+
y = (y >= (ae_f32x2 *)y_end) ? y - y_size : y;
294+
remaining_samples -= samples_without_wrap;
295+
}
296+
297+
/* Update the source and sink for bytes consumed and produced. Return success. */
298+
source_release_data(source, bytes);
299+
sink_commit_buffer(sink, bytes);
300+
return 0;
301+
}
302+
#endif /* CONFIG_FORMAT_S32LE */
303+
304+
/* This struct array defines the used processing functions for
305+
* the PCM formats
306+
*/
307+
const struct level_multiplier_proc_fnmap level_multiplier_proc_fnmap[] = {
308+
#if CONFIG_FORMAT_S16LE
309+
{ SOF_IPC_FRAME_S16_LE, level_multiplier_s16 },
310+
#endif
311+
#if CONFIG_FORMAT_S24LE
312+
{ SOF_IPC_FRAME_S24_4LE, level_multiplier_s24 },
313+
#endif
314+
#if CONFIG_FORMAT_S32LE
315+
{ SOF_IPC_FRAME_S32_LE, level_multiplier_s32 },
316+
#endif
317+
};
318+
319+
/**
320+
* level_multiplier_find_proc_func() - Find suitable processing function.
321+
* @src_fmt: Enum value for PCM format.
322+
*
323+
* This function finds the suitable processing function to use for
324+
* the used PCM format. If not found, return NULL.
325+
*
326+
* Return: Pointer to processing function for the requested PCM format.
327+
*/
328+
level_multiplier_func level_multiplier_find_proc_func(enum sof_ipc_frame src_fmt)
329+
{
330+
int i;
331+
332+
/* Find suitable processing function from map */
333+
for (i = 0; i < ARRAY_SIZE(level_multiplier_proc_fnmap); i++)
334+
if (src_fmt == level_multiplier_proc_fnmap[i].frame_fmt)
335+
return level_multiplier_proc_fnmap[i].level_multiplier_proc_func;
336+
337+
return NULL;
338+
}
339+
340+
#endif /* SOF_USE_MIN_HIFI(3, VOLUME) */

src/audio/level_multiplier/llext/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
sof_llext_build("level_multiplier"
55
SOURCES ../level_multiplier.c
66
../level_multiplier-generic.c
7+
../level_multiplier-hifi3.c
78
../level_multiplier-ipc4.c
89
LIB openmodules
910
)

0 commit comments

Comments
 (0)