Skip to content

Commit badf4c0

Browse files
authored
Merge pull request #5592 from RajalakshmiSR/sgemm-p10-unroll
POWER10: Reduce sgemm loop unrolling
2 parents e4344de + 2283fcb commit badf4c0

File tree

1 file changed

+1
-111
lines changed

1 file changed

+1
-111
lines changed

kernel/power/sgemm_kernel_power10.c

Lines changed: 1 addition & 111 deletions
Original file line numberDiff line numberDiff line change
@@ -245,118 +245,8 @@ CNAME (BLASLONG m, BLASLONG n, BLASLONG k, FLOAT alpha, FLOAT * A, FLOAT * B,
245245
AO += 16;
246246
BO += 8;
247247
temp--;
248-
BLASLONG K = temp / 64;
248+
BLASLONG K = temp / 16;
249249
for (l = 0; l < K; l++)
250-
{
251-
vec_t *rowA = (vec_t *) & AO[0];
252-
vec_t *rowB = (vec_t *) & BO[0];
253-
KERNEL (0, 0);
254-
KERNEL (2, 4);
255-
KERNEL (4, 8);
256-
KERNEL (6, 12);
257-
KERNEL (8, 16);
258-
KERNEL (10, 20);
259-
KERNEL (12, 24);
260-
KERNEL (14, 28);
261-
KERNEL (16, 32);
262-
KERNEL (18, 36);
263-
KERNEL (20, 40);
264-
KERNEL (22, 44);
265-
KERNEL (24, 48);
266-
KERNEL (26, 52);
267-
KERNEL (28, 56);
268-
KERNEL (30, 60);
269-
KERNEL (32, 64);
270-
KERNEL (34, 68);
271-
KERNEL (36, 72);
272-
KERNEL (38, 76);
273-
KERNEL (40, 80);
274-
KERNEL (42, 84);
275-
KERNEL (44, 88);
276-
KERNEL (46, 92);
277-
KERNEL (48, 96);
278-
KERNEL (50, 100);
279-
KERNEL (52, 104);
280-
KERNEL (54, 108);
281-
KERNEL (56, 112);
282-
KERNEL (58, 116);
283-
KERNEL (60, 120);
284-
KERNEL (62, 124);
285-
KERNEL (64, 128);
286-
KERNEL (66, 132);
287-
KERNEL (68, 136);
288-
KERNEL (70, 140);
289-
KERNEL (72, 144);
290-
KERNEL (74, 148);
291-
KERNEL (76, 152);
292-
KERNEL (78, 156);
293-
KERNEL (80, 160);
294-
KERNEL (82, 164);
295-
KERNEL (84, 168);
296-
KERNEL (86, 172);
297-
KERNEL (88, 176);
298-
KERNEL (90, 180);
299-
KERNEL (92, 184);
300-
KERNEL (94, 188);
301-
KERNEL (96, 192);
302-
KERNEL (98, 196);
303-
KERNEL (100, 200);
304-
KERNEL (102, 204);
305-
KERNEL (104, 208);
306-
KERNEL (106, 212);
307-
KERNEL (108, 216);
308-
KERNEL (110, 220);
309-
KERNEL (112, 224);
310-
KERNEL (114, 228);
311-
KERNEL (116, 232);
312-
KERNEL (118, 236);
313-
KERNEL (120, 240);
314-
KERNEL (122, 244);
315-
KERNEL (124, 248);
316-
KERNEL (126, 252);
317-
AO += 1024;
318-
BO += 512;
319-
}
320-
if ((temp & 63) >> 5)
321-
{
322-
vec_t *rowA = (vec_t *) & AO[0];
323-
vec_t *rowB = (vec_t *) & BO[0];
324-
KERNEL (0, 0);
325-
KERNEL (2, 4);
326-
KERNEL (4, 8);
327-
KERNEL (6, 12);
328-
KERNEL (8, 16);
329-
KERNEL (10, 20);
330-
KERNEL (12, 24);
331-
KERNEL (14, 28);
332-
KERNEL (16, 32);
333-
KERNEL (18, 36);
334-
KERNEL (20, 40);
335-
KERNEL (22, 44);
336-
KERNEL (24, 48);
337-
KERNEL (26, 52);
338-
KERNEL (28, 56);
339-
KERNEL (30, 60);
340-
KERNEL (32, 64);
341-
KERNEL (34, 68);
342-
KERNEL (36, 72);
343-
KERNEL (38, 76);
344-
KERNEL (40, 80);
345-
KERNEL (42, 84);
346-
KERNEL (44, 88);
347-
KERNEL (46, 92);
348-
KERNEL (48, 96);
349-
KERNEL (50, 100);
350-
KERNEL (52, 104);
351-
KERNEL (54, 108);
352-
KERNEL (56, 112);
353-
KERNEL (58, 116);
354-
KERNEL (60, 120);
355-
KERNEL (62, 124);
356-
AO += 512;
357-
BO += 256;
358-
}
359-
if ((temp & 31) >> 4)
360250
{
361251
vec_t *rowA = (vec_t *) & AO[0];
362252
vec_t *rowB = (vec_t *) & BO[0];

0 commit comments

Comments
 (0)