Skip to content

Commit e68223a

Browse files
committed
Math: Optimizations to CORDIC trigonometric functions
This patch optimizes the cycles count performance for the math functions. The computational accuracy is not impacted. In function cordic_approx() in trig.c, the most performance critical function, the if statements are simplified and macro values used instead of global constants. The first if statement is simplified with use of absolute value of the parameter th_rad_fxp and using new direction variable from sign of the parameter. The b_idx iteration loop is simplified with direction sign variable and new variable for common shift amount. The local variables in the b_idx iteration speed up the algorithm. In is_scalar_cordic_acos() and is_scalar_cordic_acos() functions the numiters parameter is replaced by numiters_minus_one to avoid the subtract by one. The cosvalue shift is avoided by changing the constant to compare to as 2x. The variable k is eliminated from the b_i iteration since it is duplicate of variable b_i. The constant variables to macros names have been adjusted for the actual Q-format computed the code. E.g. PI/2 Q29 fraction is same integer value as PI Q28 fraction. In header file trig.h the PI related constants are adjusted to Q-format used in the code. The equation to calculate the value in Octave is shown in comment. The new macros for table size -1 iteration counts are added and the inline functions are changed to use it. The int16_t parameters for functions are replaced with int for better code speed. These changes save in MTL platform 46 MCPS in stft_process component with 1024 size FFT and 256 hop in converting FFTs to (magnitude,phase) phase format and back complex FFT. Signed-off-by: Seppo Ingalsuo <seppo.ingalsuo@linux.intel.com>
1 parent cedef9b commit e68223a

File tree

2 files changed

+81
-111
lines changed

2 files changed

+81
-111
lines changed

src/include/sof/math/trig.h

Lines changed: 22 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -13,14 +13,17 @@
1313

1414
#include <stdint.h>
1515

16-
#define PI_DIV2_Q4_28 421657428
17-
#define PI_DIV2_Q3_29 843314856
18-
#define PI_Q4_28 843314857
19-
#define PI_MUL2_Q4_28 1686629713
20-
#define CORDIC_31B_TABLE_SIZE 31
21-
#define CORDIC_15B_TABLE_SIZE 15
22-
#define CORDIC_30B_ITABLE_SIZE 30
23-
#define CORDIC_16B_ITABLE_SIZE 16
16+
#define PI_Q4_28 843314857 /* int32(pi * 2^28) */
17+
#define PI_MUL2_Q4_28 1686629713 /* int32(2 * pi * 2^28) */
18+
#define PI_DIV2_Q3_29 843314857 /* int32(pi / 2 * 2^29) */
19+
#define PI_Q3_29 1686629713 /* int32(pi * 2^29) */
20+
21+
#define CORDIC_31B_TABLE_SIZE 31
22+
#define CORDIC_15B_TABLE_SIZE 15
23+
#define CORDIC_30B_ITABLE_SIZE 30
24+
#define CORDIC_16B_ITABLE_SIZE 16
25+
#define CORDIC_31B_ITERS_MINUS_ONE (CORDIC_31B_TABLE_SIZE - 1)
26+
#define CORDIC_16B_ITERS_MINUS_ONE (CORDIC_16B_ITABLE_SIZE - 1)
2427

2528
typedef enum {
2629
EN_32B_CORDIC_SINE,
@@ -38,8 +41,9 @@ struct cordic_cmpx {
3841

3942
void cordic_approx(int32_t th_rad_fxp, int32_t a_idx, int32_t *sign, int32_t *b_yn, int32_t *xn,
4043
int32_t *th_cdc_fxp);
41-
int32_t is_scalar_cordic_acos(int32_t realvalue, int16_t numiters);
42-
int32_t is_scalar_cordic_asin(int32_t realvalue, int16_t numiters);
44+
int32_t is_scalar_cordic_acos(int32_t realvalue, int numiters_minus_one);
45+
int32_t is_scalar_cordic_asin(int32_t realvalue, int numiters_minus_one);
46+
4347
void cmpx_cexp(int32_t sign, int32_t b_yn, int32_t xn, cordic_cfg type, struct cordic_cmpx *cexp);
4448
/* Input is Q4.28, output is Q1.31 */
4549
/**
@@ -238,11 +242,9 @@ static inline int32_t asin_fixed_32b(int32_t cdc_asin_th)
238242
int32_t th_asin_fxp;
239243

240244
if (cdc_asin_th >= 0)
241-
th_asin_fxp = is_scalar_cordic_asin(cdc_asin_th,
242-
CORDIC_31B_TABLE_SIZE);
245+
th_asin_fxp = is_scalar_cordic_asin(cdc_asin_th, CORDIC_31B_ITERS_MINUS_ONE);
243246
else
244-
th_asin_fxp = -is_scalar_cordic_asin(-cdc_asin_th,
245-
CORDIC_31B_TABLE_SIZE);
247+
th_asin_fxp = -is_scalar_cordic_asin(-cdc_asin_th, CORDIC_31B_ITERS_MINUS_ONE);
246248

247249
return th_asin_fxp; /* Q2.30 */
248250
}
@@ -262,12 +264,10 @@ static inline int32_t acos_fixed_32b(int32_t cdc_acos_th)
262264
int32_t th_acos_fxp;
263265

264266
if (cdc_acos_th >= 0)
265-
th_acos_fxp = is_scalar_cordic_acos(cdc_acos_th,
266-
CORDIC_31B_TABLE_SIZE);
267+
th_acos_fxp = is_scalar_cordic_acos(cdc_acos_th, CORDIC_31B_ITERS_MINUS_ONE);
267268
else
268269
th_acos_fxp =
269-
PI_MUL2_Q4_28 - is_scalar_cordic_acos(-cdc_acos_th,
270-
CORDIC_31B_TABLE_SIZE);
270+
PI_Q3_29 - is_scalar_cordic_acos(-cdc_acos_th, CORDIC_31B_ITERS_MINUS_ONE);
271271

272272
return th_acos_fxp; /* Q3.29 */
273273
}
@@ -289,11 +289,9 @@ static inline int16_t asin_fixed_16b(int32_t cdc_asin_th)
289289
int32_t th_asin_fxp;
290290

291291
if (cdc_asin_th >= 0)
292-
th_asin_fxp = is_scalar_cordic_asin(cdc_asin_th,
293-
CORDIC_16B_ITABLE_SIZE);
292+
th_asin_fxp = is_scalar_cordic_asin(cdc_asin_th, CORDIC_16B_ITERS_MINUS_ONE);
294293
else
295-
th_asin_fxp = -is_scalar_cordic_asin(-cdc_asin_th,
296-
CORDIC_16B_ITABLE_SIZE);
294+
th_asin_fxp = -is_scalar_cordic_asin(-cdc_asin_th, CORDIC_16B_ITERS_MINUS_ONE);
297295
/*convert Q2.30 to Q2.14 format*/
298296
return sat_int16(Q_SHIFT_RND(th_asin_fxp, 30, 14));
299297
}
@@ -314,12 +312,10 @@ static inline int16_t acos_fixed_16b(int32_t cdc_acos_th)
314312
int32_t th_acos_fxp;
315313

316314
if (cdc_acos_th >= 0)
317-
th_acos_fxp = is_scalar_cordic_acos(cdc_acos_th,
318-
CORDIC_16B_ITABLE_SIZE);
315+
th_acos_fxp = is_scalar_cordic_acos(cdc_acos_th, CORDIC_16B_ITERS_MINUS_ONE);
319316
else
320317
th_acos_fxp =
321-
PI_MUL2_Q4_28 - is_scalar_cordic_acos(-cdc_acos_th,
322-
CORDIC_16B_ITABLE_SIZE);
318+
PI_Q3_29 - is_scalar_cordic_acos(-cdc_acos_th, CORDIC_16B_ITERS_MINUS_ONE);
323319

324320
/*convert Q3.29 to Q3.13 format*/
325321
return sat_int16(Q_SHIFT_RND(th_acos_fxp, 29, 13));

src/math/trig.c

Lines changed: 59 additions & 85 deletions
Original file line numberDiff line numberDiff line change
@@ -13,18 +13,13 @@
1313
#include <sof/math/cordic.h>
1414
#include <stdint.h>
1515

16-
/* Use a local definition to avoid adding a dependency on <math.h> */
17-
#define _M_PI 3.14159265358979323846 /* pi */
16+
#define CORDIC_SINE_COS_LUT_Q29 652032874 /* deg = 69.586061, int32(1.214505869895220 * 2^29) */
17+
18+
#define CORDIC_SINCOS_PIOVERTWO_Q28 421657428 /* int32(pi / 2 * 2^28) */
19+
#define CORDIC_SINCOS_PI_Q28 843314857 /* int32(pi * 2^28) */
20+
#define CORDIC_SINCOS_TWOPI_Q28 1686629713 /* int32(2 * pi * 2^28) */
21+
#define CORDIC_SINCOS_ONEANDHALFPI_Q28 1264972285 /* int32(1.5 * pi * 2^28) */
1822

19-
/* 652032874 , deg = 69.586061*/
20-
const int32_t cordic_sine_cos_lut_q29fl = Q_CONVERT_FLOAT(1.214505869895220, 29);
21-
/* 1686629713, deg = 90.000000 */
22-
const int32_t cordic_sine_cos_piovertwo_q30fl = Q_CONVERT_FLOAT(_M_PI / 2, 30);
23-
/* 421657428 , deg = 90.000000 */
24-
const int32_t cord_sincos_piovertwo_q28fl = Q_CONVERT_FLOAT(_M_PI / 2, 28);
25-
/* 843314857, deg = 90.000000 */
26-
const int32_t cord_sincos_piovertwo_q29fl = Q_CONVERT_FLOAT(_M_PI / 2, 29);
27-
/* arc trignometry constant*/
2823
/**
2924
* CORDIC-based approximation of sine and cosine
3025
* \+----------+----------------------------------------+--------------------+-------------------+
@@ -36,20 +31,25 @@ const int32_t cord_sincos_piovertwo_q29fl = Q_CONVERT_FLOAT(_M_PI / 2, 29);
3631
* \|1686629713| Q_CONVERT_FLOAT(1.5707963267341256, 30)| 89.9999999965181| 1.57079632673413 |
3732
* \+----------+----------------------------------------+--------------------+-------------------+
3833
*/
39-
/* 379625062, deg = 81.0284683480568475 or round(1.4142135605216026*2^28) */
40-
const int32_t cord_arcsincos_q28fl = Q_CONVERT_FLOAT(1.4142135605216026 / 2, 28);
41-
/* 1073741824, deg = 57.2957795130823229 or round(1*2^30)*/
42-
const int32_t cord_arcsincos_q30fl = Q_CONVERT_FLOAT(1.0000000000000000, 30);
34+
35+
#define CORDIC_ARCSINCOS_SQRT2_DIV4_Q30 379625062 /* int32(sqrt(2) / 4 * 2^30) */
36+
#define CORDIC_ARCSINCOS_ONE_Q30 1073741824 /* int32(1 * 2^30) */
37+
4338
/**
4439
* CORDIC-based approximation of sine, cosine and complex exponential
4540
*/
4641
void cordic_approx(int32_t th_rad_fxp, int32_t a_idx, int32_t *sign, int32_t *b_yn, int32_t *xn,
4742
int32_t *th_cdc_fxp)
4843
{
44+
int32_t direction;
45+
int32_t abs_th;
4946
int32_t b_idx;
50-
int32_t xtmp;
51-
int32_t ytmp;
52-
*sign = 1;
47+
int32_t xn_local = CORDIC_SINE_COS_LUT_Q29;
48+
int32_t yn_local = 0;
49+
int32_t xtmp = CORDIC_SINE_COS_LUT_Q29;
50+
int32_t ytmp = 0;
51+
int shift;
52+
5353
/* Addition or subtraction by a multiple of pi/2 is done in the data type
5454
* of the input. When the fraction length is 29, then the quantization error
5555
* introduced by the addition or subtraction of pi/2 is done with 29 bits of
@@ -58,46 +58,37 @@ void cordic_approx(int32_t th_rad_fxp, int32_t a_idx, int32_t *sign, int32_t *b_
5858
* without overflow.Increase of fractionLength makes the addition or
5959
* subtraction of a multiple of pi/2 more precise
6060
*/
61-
if (th_rad_fxp > cord_sincos_piovertwo_q28fl) {
62-
if ((th_rad_fxp - cord_sincos_piovertwo_q29fl) <= cord_sincos_piovertwo_q28fl) {
63-
th_rad_fxp -= cord_sincos_piovertwo_q29fl;
64-
*sign = -1;
65-
} else {
66-
th_rad_fxp -= cordic_sine_cos_piovertwo_q30fl;
67-
}
68-
} else if (th_rad_fxp < -cord_sincos_piovertwo_q28fl) {
69-
if ((th_rad_fxp + cord_sincos_piovertwo_q29fl) >= -cord_sincos_piovertwo_q28fl) {
70-
th_rad_fxp += cord_sincos_piovertwo_q29fl;
71-
*sign = -1;
61+
abs_th = (th_rad_fxp >= 0) ? th_rad_fxp : -th_rad_fxp;
62+
direction = (th_rad_fxp >= 0) ? 1 : -1;
63+
*sign = 1;
64+
if (abs_th > CORDIC_SINCOS_PIOVERTWO_Q28) {
65+
if (abs_th <= CORDIC_SINCOS_ONEANDHALFPI_Q28) {
66+
th_rad_fxp -= direction * CORDIC_SINCOS_PI_Q28;
67+
*sign = -1;
7268
} else {
73-
th_rad_fxp += cordic_sine_cos_piovertwo_q30fl;
69+
th_rad_fxp -= direction * CORDIC_SINCOS_TWOPI_Q28;
7470
}
7571
}
7672

7773
th_rad_fxp <<= 2;
78-
*b_yn = 0;
79-
*xn = cordic_sine_cos_lut_q29fl;
80-
xtmp = cordic_sine_cos_lut_q29fl;
81-
ytmp = 0;
8274

8375
/* Calculate the correct coefficient values from rotation angle.
8476
* Find difference between the coefficients from the lookup table
8577
* and those from the calculation
8678
*/
8779
for (b_idx = 0; b_idx < a_idx; b_idx++) {
88-
if (th_rad_fxp < 0) {
89-
th_rad_fxp += cordic_lookup[b_idx];
90-
*xn += ytmp;
91-
*b_yn -= xtmp;
92-
} else {
93-
th_rad_fxp -= cordic_lookup[b_idx];
94-
*xn -= ytmp;
95-
*b_yn += xtmp;
96-
}
97-
xtmp = *xn >> (b_idx + 1);
98-
ytmp = *b_yn >> (b_idx + 1);
80+
direction = (th_rad_fxp >= 0) ? 1 : -1;
81+
shift = b_idx + 1;
82+
th_rad_fxp -= direction * cordic_lookup[b_idx];
83+
xn_local -= direction * ytmp;
84+
yn_local += direction * xtmp;
85+
xtmp = xn_local >> shift;
86+
ytmp = yn_local >> shift;
9987
}
100-
/* Q2.30 format -sine, cosine*/
88+
89+
/* Write back results once */
90+
*xn = xn_local;
91+
*b_yn = yn_local;
10192
*th_cdc_fxp = th_rad_fxp;
10293
}
10394
EXPORT_SYMBOL(cordic_approx);
@@ -108,7 +99,7 @@ EXPORT_SYMBOL(cordic_approx);
10899
* int16_t numiters
109100
* Return Type : int32_t
110101
*/
111-
int32_t is_scalar_cordic_acos(int32_t cosvalue, int16_t numiters)
102+
int32_t is_scalar_cordic_acos(int32_t cosvalue, int numiters_minus_one)
112103
{
113104
int32_t xdshift;
114105
int32_t ydshift;
@@ -118,46 +109,37 @@ int32_t is_scalar_cordic_acos(int32_t cosvalue, int16_t numiters)
118109
int32_t y = 0;
119110
int32_t z = 0;
120111
int32_t sign;
121-
int32_t b_i;
122-
int i;
112+
int b_i;
123113
int j;
124-
int k;
125114

126115
/* Initialize the variables for the cordic iteration
127116
* angles less than pi/4, we initialize (x,y) along the x-axis.
128117
* angles greater than or equal to pi/4, we initialize (x,y)
129118
* along the y-axis. This improves the accuracy of the algorithm
130119
* near the edge of the domain of convergence
120+
*
121+
* Note: not pi/4 but sqrt(2)/4 is used as the threshold
131122
*/
132-
if ((cosvalue >> 1) < cord_arcsincos_q28fl) {
133-
x = 0;
134-
y = cord_arcsincos_q30fl;
123+
if (cosvalue < CORDIC_ARCSINCOS_SQRT2_DIV4_Q30) {
124+
y = CORDIC_ARCSINCOS_ONE_Q30;
135125
z = PI_DIV2_Q3_29;
136126
} else {
137-
x = cord_arcsincos_q30fl;
138-
y = 0;
139-
z = 0;
127+
x = CORDIC_ARCSINCOS_ONE_Q30;
140128
}
141129

142130
/* DCORDIC(Double CORDIC) algorithm */
143131
/* Double iterations method consists in the fact that unlike the classical */
144132
/* CORDIC method,where the iteration step value changes EVERY time, i.e. on */
145133
/* each iteration, in the double iteration method, the iteration step value */
146134
/* is repeated twice and changes only through one iteration */
147-
i = numiters - 1;
148-
for (b_i = 0; b_i < i; b_i++) {
135+
for (b_i = 0; b_i < numiters_minus_one; b_i++) {
149136
j = (b_i + 1) << 1;
150137
if (j >= 31)
151138
j = 31;
152139

153-
if (b_i < 31)
154-
k = b_i;
155-
else
156-
k = 31;
157-
158-
xshift = x >> k;
140+
xshift = x >> b_i;
141+
yshift = y >> b_i;
159142
xdshift = x >> j;
160-
yshift = y >> k;
161143
ydshift = y >> j;
162144
/* Do nothing if x currently equals the target value. Allowed for
163145
* double rotations algorithms, as it is equivalent to rotating by
@@ -188,7 +170,7 @@ int32_t is_scalar_cordic_acos(int32_t cosvalue, int16_t numiters)
188170
* int16_t numiters
189171
* Return Type : int32_t
190172
*/
191-
int32_t is_scalar_cordic_asin(int32_t sinvalue, int16_t numiters)
173+
int32_t is_scalar_cordic_asin(int32_t sinvalue, int numiters_minus_one)
192174
{
193175
int32_t xdshift;
194176
int32_t ydshift;
@@ -198,47 +180,39 @@ int32_t is_scalar_cordic_asin(int32_t sinvalue, int16_t numiters)
198180
int32_t y = 0;
199181
int32_t z = 0;
200182
int32_t sign;
201-
int32_t b_i;
202-
int i;
183+
int b_i;
203184
int j;
204-
int k;
205185

206186
/* Initialize the variables for the cordic iteration
207187
* angles less than pi/4, we initialize (x,y) along the x-axis.
208188
* angles greater than or equal to pi/4, we initialize (x,y)
209189
* along the y-axis. This improves the accuracy of the algorithm
210190
* near the edge of the domain of convergence
191+
*
192+
* Note: Instead of pi/4, sqrt(2)/4 is used as the threshold
211193
*/
212-
if ((sinvalue >> 1) > cord_arcsincos_q28fl) {
213-
x = 0;
214-
y = cord_arcsincos_q30fl;
194+
if (sinvalue > CORDIC_ARCSINCOS_SQRT2_DIV4_Q30) {
195+
y = CORDIC_ARCSINCOS_ONE_Q30;
215196
z = PI_DIV2_Q3_29;
216197
} else {
217-
x = cord_arcsincos_q30fl;
218-
y = 0;
219-
z = 0;
198+
x = CORDIC_ARCSINCOS_ONE_Q30;
220199
}
221200

222201
/* DCORDIC(Double CORDIC) algorithm */
223202
/* Double iterations method consists in the fact that unlike the classical */
224203
/* CORDIC method,where the iteration step value changes EVERY time, i.e. on */
225204
/* each iteration, in the double iteration method, the iteration step value */
226205
/* is repeated twice and changes only through one iteration */
227-
i = numiters - 1;
228-
for (b_i = 0; b_i < i; b_i++) {
206+
// i = numiters - 1;
207+
for (b_i = 0; b_i < numiters_minus_one; b_i++) {
229208
j = (b_i + 1) << 1;
230209
if (j >= 31)
231210
j = 31;
232211

233-
if (b_i < 31)
234-
k = b_i;
235-
else
236-
k = 31;
237-
238-
xshift = x >> k;
239-
xdshift = x >> j;
240-
yshift = y >> k;
212+
xshift = x >> b_i;
213+
yshift = y >> b_i;
241214
ydshift = y >> j;
215+
xdshift = x >> j;
242216
/* Do nothing if x currently equals the target value. Allowed for
243217
* double rotations algorithms, as it is equivalent to rotating by
244218
* the same angle in opposite directions sequentially. Accounts for

0 commit comments

Comments
 (0)