From 070aaaf1f76afb6b88132a173106d28d2845116c Mon Sep 17 00:00:00 2001 From: jodavies Date: Thu, 26 Feb 2026 11:37:11 +0000 Subject: [PATCH] perf: allocate Normalize buffers dynamically, store in AT Large stack allocations have a performance implication due to stack clash protection. Dynamically allocate the buffers needed by Normalize in AT, instead. This leads to a large performance improvement. --- sources/declare.h | 1 + sources/normal.c | 80 +++++++++++++++++++++++++++++++++-------------- sources/setfile.c | 26 +++++++++++++++ sources/startup.c | 5 +++ sources/structs.h | 23 ++++++++++++++ sources/threads.c | 6 ++++ 6 files changed, 118 insertions(+), 23 deletions(-) diff --git a/sources/declare.h b/sources/declare.h index 802e171e..b8493ef4 100644 --- a/sources/declare.h +++ b/sources/declare.h @@ -1300,6 +1300,7 @@ extern void SubsInAll(PHEAD0); extern void TransferBuffer(int,int,int); extern int TakeIDfunction(PHEAD WORD *); extern int MakeSetupAllocs(void); +extern NORMDATA *AllocNormData(void); extern int TryFileSetups(void); extern void ExchangeExpressions(int,int); extern void ExchangeDollars(int,int); diff --git a/sources/normal.c b/sources/normal.c index 7e5254ff..93d6e4c0 100644 --- a/sources/normal.c +++ b/sources/normal.c @@ -1,11 +1,12 @@ /** @file normal.c * - * Mainly the routine Normalize. This routine brings terms to standard - * FORM. Currently it has one serious drawback. Its buffers are all - * in the stack. This means these buffers have a fixed size (NORMSIZE). - * In the past this has caused problems and NORMSIZE had to be increased. + * Mainly the routine Normalize. This routine brings terms to standard + * form. Its main buffers are in AT, but they have a fixed size controlled + * by NORMSIZE, which limit the maximum complexity of terms which can be + * normalized. * - * It is not clear whether Normalize can be called recursively. + * Normalize is called recursively, currently via: + * Normalize -> ExpandRat -> Normalize. */ /* #[ License : */ /* @@ -185,8 +186,7 @@ int Commute(WORD *fleft, WORD *fright) This is the big normalization routine. It has a great need to be economical. - There is a fixed limit to the number of objects coming in. - Something should be done about it. + The limit on the number of objects coming in is given by NORMSIZE. */ @@ -199,20 +199,41 @@ int Normalize(PHEAD WORD *term) WORD *t, *m, *r, i, j, k, l, nsym, *ss, *tt, *u; WORD shortnum, stype; WORD *stop, *to = 0, *from = 0; -/* - The next variables could be better off in the AT.WorkSpace (?) - Now they make stackallocations rather bothersome. -*/ - WORD psym[7*NORMSIZE],*ppsym; - WORD pvec[NORMSIZE],*ppvec,nvec; - WORD pdot[3*NORMSIZE],*ppdot,ndot; - WORD pdel[2*NORMSIZE],*ppdel,ndel; - WORD pind[NORMSIZE],nind; - WORD *peps[NORMSIZE/3],neps; - WORD *pden[NORMSIZE/3],nden; - WORD *pcom[NORMSIZE],ncom; - WORD *pnco[NORMSIZE],nnco; - WORD *pcon[2*NORMSIZE],ncon; /* Pointer to contractable indices */ + + WORD *ppsym, *ppvec, *ppdot, *ppdel; + WORD nvec, ndot, ndel, nind, neps, nden, ncom, nnco, ncon; + + AT.NormDepth++; +#ifdef DEBUGGING + if ( AT.NormDepth > 2 ) { + // We don't expect this to happen in the current codebase. + Terminate(-1); + } +#endif + if ( AT.NormDepth > AT.NormDataSize ) { + NORMDATA **top = AT.NormData + AT.NormDataSize; + DoubleBuffer((void **)&(AT.NormData), (void **)&(top), + sizeof(*AT.NormData), "double NormData pointers"); + AT.NormDataSize *= 2; + for ( LONG i = AT.NormDepth-1; i < AT.NormDataSize; i++ ) { + AT.NormData[i] = NULL; + } + } + if ( AT.NormData[AT.NormDepth-1] == NULL ) { + AT.NormData[AT.NormDepth-1] = AllocNormData(); + } + + WORD *psym = AT.NormData[AT.NormDepth-1]->psym; + WORD *pvec = AT.NormData[AT.NormDepth-1]->pvec; + WORD *pdot = AT.NormData[AT.NormDepth-1]->pdot; + WORD *pdel = AT.NormData[AT.NormDepth-1]->pdel; + WORD *pind = AT.NormData[AT.NormDepth-1]->pind; + WORD **peps = AT.NormData[AT.NormDepth-1]->peps; + WORD **pden = AT.NormData[AT.NormDepth-1]->pden; + WORD **pcom = AT.NormData[AT.NormDepth-1]->pcom; + WORD **pnco = AT.NormData[AT.NormDepth-1]->pnco; + WORD **pcon = AT.NormData[AT.NormDepth-1]->pcon; + WORD *n_coef, ncoef; /* Accumulator for the coefficient */ WORD *n_llnum, *lnum, nnum; WORD *termout, oldtoprhs = 0, subtype; @@ -243,7 +264,12 @@ PrintTerm(term,"Normalize"); didcontr = 0; ReplaceType = -1; t = term; - if ( !*t ) { TermFree(n_coef,"NormCoef"); TermFree(n_llnum,"n_llnum"); return(regval); } + if ( !*t ) { + AT.NormDepth--; + TermFree(n_coef,"NormCoef"); + TermFree(n_llnum,"n_llnum"); + return(regval); + } r = t + *t; ncoef = r[-1]; i = ABS(ncoef); @@ -4134,6 +4160,7 @@ regularratfun:; C->numrhs = oldtoprhs; C->Pointer = C->Buffer + oldcpointer; */ + AT.NormDepth--; TermFree(n_llnum,"n_llnum"); TermFree(n_coef,"NormCoef"); return(1); @@ -4161,6 +4188,7 @@ regularratfun:; TermAssign(term); } */ + AT.NormDepth--; TermFree(n_llnum,"n_llnum"); TermFree(n_coef,"NormCoef"); return(regval); @@ -4186,11 +4214,13 @@ regularratfun:; NormZero: *term = 0; AT.WorkPointer = termout; + AT.NormDepth--; TermFree(n_llnum,"n_llnum"); TermFree(n_coef,"NormCoef"); return(regval); NormMin: + AT.NormDepth--; TermFree(n_llnum,"n_llnum"); TermFree(n_coef,"NormCoef"); return(-1); @@ -4199,6 +4229,7 @@ regularratfun:; MLOCK(ErrorMessageLock); MesCall("Norm"); MUNLOCK(ErrorMessageLock); + AT.NormDepth--; TermFree(n_llnum,"n_llnum"); TermFree(n_coef,"NormCoef"); return(-1); @@ -5164,7 +5195,10 @@ void DropSymbols(PHEAD WORD *term) int SymbolNormalize(WORD *term) { GETIDENTITY - WORD buffer[7*NORMSIZE], *t, *b, *bb, *tt, *m, *tstop; + WORD *t, *b, *bb, *tt, *m, *tstop; + // Here we use a stack-allocated array, since things are much smaller + // compared to the full Normalize routine. + WORD buffer[7*NORMSIZE]; int i; b = buffer; *b++ = SYMBOL; *b++ = 2; diff --git a/sources/setfile.c b/sources/setfile.c index a61cde24..17f028a3 100644 --- a/sources/setfile.c +++ b/sources/setfile.c @@ -1116,6 +1116,32 @@ int MakeSetupAllocs(void) /* #] MakeSetupAllocs : + #[ AllocNormData : + + Allocate the arrays within the NORMDATA struct. These are dynamic, + such that valgrind can detect buffer overruns in these arrays. +*/ + +NORMDATA* AllocNormData(void) +{ + NORMDATA* tmp = Malloc1(sizeof(NORMDATA), "NormData struct"); + + tmp->psym = Malloc1(7*NORMSIZE*sizeof(*(tmp->psym)), "Normalize struct psym"); + tmp->pvec = Malloc1(1*NORMSIZE*sizeof(*(tmp->pvec)), "Normalize struct pvec"); + tmp->pdot = Malloc1(3*NORMSIZE*sizeof(*(tmp->pdot)), "Normalize struct pdot"); + tmp->pdel = Malloc1(2*NORMSIZE*sizeof(*(tmp->pdel)), "Normalize struct pdel"); + tmp->pind = Malloc1(1*NORMSIZE*sizeof(*(tmp->pind)), "Normalize struct pind"); + tmp->peps = Malloc1(NORMSIZE/3*sizeof(*(tmp->peps)), "Normalize struct peps"); + tmp->pden = Malloc1(NORMSIZE/3*sizeof(*(tmp->pden)), "Normalize struct pden"); + tmp->pcom = Malloc1(1*NORMSIZE*sizeof(*(tmp->pcom)), "Normalize struct pcom"); + tmp->pnco = Malloc1(1*NORMSIZE*sizeof(*(tmp->pnco)), "Normalize struct pnco"); + tmp->pcon = Malloc1(2*NORMSIZE*sizeof(*(tmp->pcon)), "Normalize struct pcon"); + + return tmp; +} + +/* + #] AllocNormData : #[ TryFileSetups : Routine looks in the input file for a start of the type diff --git a/sources/startup.c b/sources/startup.c index e724906a..4f7adcde 100644 --- a/sources/startup.c +++ b/sources/startup.c @@ -1232,6 +1232,11 @@ void StartVariables(void) AR.wranfnpair1 = NPAIR1; AR.wranfnpair2 = NPAIR2; AR.wranfseed = 0; + + AT.NormData = Malloc1(sizeof(*(AT.NormData)), "NormData pointers"); + AT.NormDataSize = 1; + AT.NormData[0] = AllocNormData(); + AT.NormDepth = 0; #endif AM.atstartup = 1; AM.oldnumextrasymbols = strDup1((UBYTE *)"OLDNUMEXTRASYMBOLS_","oldnumextrasymbols"); diff --git a/sources/structs.h b/sources/structs.h index 651a6ddf..c4c64897 100644 --- a/sources/structs.h +++ b/sources/structs.h @@ -1373,6 +1373,26 @@ typedef struct { WORD flags; } TERMINFO; +/* + Struct to hold temporary data in Normalize, so that it is not allocated + on the stack each function call. The sizes of these arrays introduces a + maximum complexity of terms which can be normalized. Each array is + allocated dynamically, by AllocNormData, such that valgrind can detect + errors if they are over-run. +*/ +typedef struct NoRmDaTa { + WORD *psym; + WORD *pvec; + WORD *pdot; + WORD *pdel; + WORD *pind; + WORD **peps; + WORD **pden; + WORD **pcom; + WORD **pnco; + WORD **pcon; +} NORMDATA; + /* #] Varia : #[ A : @@ -2081,6 +2101,9 @@ struct T_const { void *aux_; void *auxr_; #endif + NORMDATA **NormData; + LONG NormDataSize; + LONG NormDepth; PARTI partitions; LONG sBer; /* (T) Size of the Bernoullis buffer */ LONG pWorkPointer; /* (R) Offset-pointer in pWorkSpace */ diff --git a/sources/threads.c b/sources/threads.c index 9176e4c6..73fa5de1 100644 --- a/sources/threads.c +++ b/sources/threads.c @@ -737,6 +737,12 @@ ALLPRIVATES *InitializeOneThread(int identity) AR.wranfnpair1 = NPAIR1; AR.wranfnpair2 = NPAIR2; AR.wranfseed = 0; + + AT.NormData = Malloc1(sizeof(*(AT.NormData)), "NormData thread pointers"); + AT.NormDataSize = 1; + AT.NormData[0] = AllocNormData(); + AT.NormDepth = 0; + AN.SplitScratch = 0; AN.SplitScratchSize = AN.InScratch = 0; AN.SplitScratch1 = 0;