Skip to content

Commit 6f7e8a1

Browse files
committed
compiling with opencilk
1 parent 6718fdc commit 6f7e8a1

15 files changed

Lines changed: 113 additions & 65 deletions

Makefile

Lines changed: 41 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -1,55 +1,75 @@
1-
CILK = /opt/intel/composer_xe_2013.5.198/compiler
2-
INCADD = -I$(CILK)/include -I$(CILK)/examples/include
3-
LIBADD = -L$(CILK)/lib/intel64
1+
CILK?=0
2+
NATIVE?=1
3+
OPT?=3
4+
SANITIZE?=0
5+
6+
CFLAGS := -Wall -Wextra -O$(OPT) -g -std=c++20 -gdwarf-4 -fno-exceptions -Wno-unknown-pragmas -Wno-comment
7+
8+
ifeq ($(NATIVE),1)
9+
CFLAGS += -march=native
10+
endif
11+
12+
ifeq ($(CILK),1)
13+
CFLAGS += -fopencilk
14+
endif
15+
16+
ifeq ($(SANITIZE),1)
17+
ifeq ($(CILK),1)
18+
CFLAGS += -fsanitize=cilk,undefined,address -fno-omit-frame-pointer
19+
else
20+
CFLAGS += -fsanitize=undefined,address -fno-omit-frame-pointer
21+
endif
22+
endif
23+
24+
DEFINES := -DCILK=$(CILK)
25+
26+
all: parspmv both_d spmm_dall spmm_a spmm_sall
427

5-
GCCOPT = -O2 -fno-rtti -fno-exceptions # -ftree-vectorize
6-
INTELOPT = -O2 -no-ipo -fno-rtti -fno-exceptions -parallel -restrict -std=c++11 -xAVX -no-prec-div #-fno-inline-functions
7-
DEB = -g -DNOBM -O0 -parallel -restrict -std=c++11
828

929
seqsym: sym_spmv_test.cpp csbsym.cpp csbsym.h utility.h friends.h SSEspmv.o
10-
icpc -cilk-serialize $(INCADD) $(INTELOPT) -o seqsym sym_spmv_test.cpp SSEspmv.o
30+
$(CXX) $(CFLAGS) $(DEFINES) -o seqsym sym_spmv_test.cpp SSEspmv.o
1131

1232
parsym: sym_spmv_test.cpp csbsym.cpp csbsym.h utility.h friends.h SSEspmv.o
13-
icpc $(INCADD) $(DEB) -o parsym sym_spmv_test.cpp SSEspmv.o
33+
$(CXX) $(CFLAGS) $(DEFINES) -o parsym sym_spmv_test.cpp SSEspmv.o
1434

1535
symanal: sym_spmv_test.cpp csbsym.cpp csbsym.h utility.h friends.h SSEspmv.o
16-
icpc -DSTATS $(INCADD) $(INTELOPT) -o symanal sym_spmv_test.cpp SSEspmv.o -lcilkutil
36+
$(CXX) $(CFLAGS) $(DEFINES) -o symanal sym_spmv_test.cpp SSEspmv.o
1737

1838
seqspmv: csb_spmv_test.cpp bicsb.cpp bicsb.h bmcsb.cpp bmcsb.h friends.h utility.h SSEspmv.o
19-
icpc -cilk-serialize $(INCADD) $(INTELOPT) -o seqspmv csb_spmv_test.cpp SSEspmv.o
39+
$(CXX) $(CFLAGS) $(DEFINES) -o seqspmv csb_spmv_test.cpp SSEspmv.o
2040

2141
parspmv: csb_spmv_test.cpp bicsb.cpp bicsb.h bmcsb.cpp bmcsb.h friends.h utility.h SSEspmv.o
22-
icpc $(INCADD) $(INTELOPT) -o parspmv csb_spmv_test.cpp SSEspmv.o
42+
$(CXX) $(CFLAGS) $(DEFINES) -o parspmv csb_spmv_test.cpp SSEspmv.o
2343

2444
parspmv_nobm: csb_spmv_test.cpp bicsb.cpp bicsb.h friends.h utility.h
25-
icpc $(INCADD) $(INTELOPT) -DNOBM -o parspmv_nobm csb_spmv_test.cpp
45+
$(CXX) $(CFLAGS) $(DEFINES) -DNOBM -o parspmv_nobm csb_spmv_test.cpp
2646

2747
parspmvt: csb_spmvt_test.cpp bicsb.cpp bicsb.h utility.h friends.h
28-
icpc $(INCADD) $(INTELOPT) -o parspmvt csb_spmvt_test.cpp
48+
$(CXX) $(CFLAGS) $(DEFINES) -o parspmvt csb_spmvt_test.cpp
2949

3050
both_d: both_test.cpp bicsb.cpp bicsb.h utility.h friends.h
31-
icpc $(INCADD) $(INTELOPT) -o both_d both_test.cpp
51+
$(CXX) $(CFLAGS) $(DEFINES) -o both_d both_test.cpp
3252

3353
both_s: both_test.cpp bicsb.cpp bicsb.h utility.h friends.h
34-
icpc $(INCADD) $(INTELOPT) -DSINGLEPRECISION -o both_s both_test.cpp
54+
$(CXX) $(CFLAGS) $(DEFINES) -DSINGLEPRECISION -o both_s both_test.cpp
3555

3656
spmm_dall: spmm_test.cpp bicsb.cpp bicsb.h utility.h friends.h
3757
for number in 4 8 12 16 24 32 40 48 56 64; do \
38-
echo "icpc $(INCADD) $(INTELOPT) -DRHSDIM=$$number -o spmm_d$$number spmm_test.cpp"; \
39-
icpc $(INCADD) $(INTELOPT) -DRHSDIM=$$number -o spmm_d$$number spmm_test.cpp; \
58+
echo "$(CXX) $(CFLAGS) $(DEFINES) -DRHSDIM=$$number -o spmm_d$$number spmm_test.cpp"; \
59+
$(CXX) $(CFLAGS) $(DEFINES) -DRHSDIM=$$number -o spmm_d$$number spmm_test.cpp; \
4060
done;
4161

4262
spmm_a: spmm_test.cpp bicsb.cpp bicsb.h utility.h friends.h
43-
icpc $(INCADD) $(INTELOPT) -DSINGLEPRECISION -S -fcode-asm -vec_report6 spmm_test.cpp
63+
$(CXX) $(CFLAGS) $(DEFINES) -DSINGLEPRECISION -S -fcode-asm -vec_report6 spmm_test.cpp
4464

4565
spmm_sall: spmm_test.cpp bicsb.cpp bicsb.h utility.h friends.h
4666
for number in 4 8 12 16 24 32 40 48 56 64; do \
47-
echo "icpc $(INCADD) $(INTELOPT) -DSINGLEPRECISION -DRHSDIM=$$number -o spmm_s$$number spmm_test.cpp"; \
48-
icpc $(INCADD) $(INTELOPT) -DSINGLEPRECISION -DRHSDIM=$$number -o spmm_s$$number spmm_test.cpp; \
67+
echo "$(CXX) $(CFLAGS) $(DEFINES) -DSINGLEPRECISION -DRHSDIM=$$number -o spmm_s$$number spmm_test.cpp"; \
68+
$(CXX) $(CFLAGS) $(DEFINES) -DSINGLEPRECISION -DRHSDIM=$$number -o spmm_s$$number spmm_test.cpp; \
4969
done;
5070

5171
SSEspmv.o: SSEspmv.cpp
52-
g++ -DAMD $(GCCOPT) -march=amdfam10 -c SSEspmv.cpp
72+
$(CXX) $(CFLAGS) $(DEFINES) -c SSEspmv.cpp
5373

5474
clean:
5575
rm -f seqspmv

Semirings.h

Lines changed: 8 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
#include <climits>
77
#include <cmath>
88
#include <tr1/array>
9+
#include <memory>
910
#include "promote.h"
1011

1112
template <typename T>
@@ -60,7 +61,7 @@ struct UnrollerL {
6061
template<int End, int Step>
6162
struct UnrollerL<End, End, Step> {
6263
template<typename Lambda>
63-
static void step(Lambda& func) {
64+
[[maybe_unused]] static void step([[maybe_unused]] Lambda& func) {
6465
// base case is when Begin=End; do nothing
6566
}
6667
};
@@ -75,13 +76,13 @@ struct PTSRArray
7576
// y <- a*x + y overload with a=1
7677
static void axpy(const array<T2, D> & b, array<T_promote, D> & c)
7778
{
79+
// const T2 * __restrict barr = std::assume_aligned<ALIGN>(b.data());
80+
// T_promote * __restrict carr = std::assume_aligned<ALIGN>(c.data());
7881
const T2 * __restrict barr = b.data();
7982
T_promote * __restrict carr = c.data();
80-
__assume_aligned(barr, ALIGN);
81-
__assume_aligned(carr, ALIGN);
8283

8384
#pragma simd
84-
for(int i=0; i<D; ++i)
85+
for(unsigned int i=0; i<D; ++i)
8586
{
8687
carr[i] += barr[i];
8788
}
@@ -92,13 +93,13 @@ struct PTSRArray
9293
// Todo: Do partial unrolling; this code will bloat for D > 32
9394
static void axpy(T1 a, const array<T2,D> & b, array<T_promote,D> & c)
9495
{
96+
// const T2 * __restrict barr = std::assume_aligned<ALIGN>(b.data());
97+
// T_promote * __restrict carr = std::assume_aligned<ALIGN>(c.data());
9598
const T2 * __restrict barr = b.data();
9699
T_promote * __restrict carr = c.data();
97-
__assume_aligned(barr, ALIGN);
98-
__assume_aligned(carr, ALIGN);
99100

100101
#pragma simd
101-
for(int i=0; i<D; ++i)
102+
for(unsigned int i=0; i<D; ++i)
102103
{
103104
carr[i] += a* barr[i];
104105
}

aligned.h

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
#include <malloc.h>
33
#endif
44
#include <cstdint>
5+
#include <cstdlib>
56
#include <vector>
67
#include <iostream>
78
using namespace std;
@@ -71,7 +72,7 @@ class aligned_allocator
7172
// Returns true if and only if storage allocated from *this
7273
// can be deallocated from other, and vice versa.
7374
// Always returns true for stateless allocators.
74-
bool operator==(const aligned_allocator& other) const
75+
bool operator==([[maybe_unused]] const aligned_allocator& other) const
7576
{
7677
return true;
7778
}
@@ -110,7 +111,7 @@ class aligned_allocator
110111
}
111112

112113
// Mallocator wraps malloc().
113-
void * const pv = _mm_malloc(n * sizeof(T), Alignment);
114+
void * const pv = std::aligned_alloc(Alignment, n * sizeof(T));
114115

115116
// Allocators should throw std::bad_alloc in the case of memory allocation failure.
116117
if (pv == NULL)
@@ -121,9 +122,9 @@ class aligned_allocator
121122
return static_cast<T *>(pv);
122123
}
123124

124-
void deallocate(T * const p, const std::size_t n) const
125+
void deallocate(T * const p, [[maybe_unused]] const std::size_t n) const
125126
{
126-
_mm_free(p);
127+
free(p);
127128
}
128129

129130

bicsb.cpp

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -20,8 +20,8 @@ void BiCsb<NT, IT>::Init(int workers, IT forcelogbeta)
2020
bool sizereq;
2121
if (ispar)
2222
{
23-
sizereq = ((IntPower<2>(rowbits) > SLACKNESS * workers)
24-
&& (IntPower<2>(colbits) > SLACKNESS * workers));
23+
sizereq = ((IntPower<2>(rowbits) > (unsigned int) SLACKNESS * workers)
24+
&& (IntPower<2>(colbits) > (unsigned int) SLACKNESS * workers));
2525
}
2626
else
2727
{
@@ -43,7 +43,7 @@ void BiCsb<NT, IT>::Init(int workers, IT forcelogbeta)
4343
colhighbits = colbits-collowbits; // # higher order bits for cols (has at least one bit)
4444
if(ispar)
4545
{
46-
while(IntPower<2>(rowhighbits) < SLACKNESS * workers)
46+
while(IntPower<2>(rowhighbits) < (unsigned int) SLACKNESS * workers)
4747
{
4848
rowhighbits++;
4949
rowlowbits--;
@@ -869,8 +869,8 @@ void BiCsb<NT, IT>::SubSpMV(IT * __restrict btop, IT bstart, IT bend, const RHS
869869
IT * __restrict r_bot = bot;
870870
NT * __restrict r_num = num;
871871

872-
__m128i lcms = _mm_set1_epi32 (lowcolmask);
873-
__m128i lrms = _mm_set1_epi32 (lowrowmask);
872+
[[maybe_unused]] __m128i lcms = _mm_set1_epi32 (lowcolmask);
873+
[[maybe_unused]] __m128i lrms = _mm_set1_epi32 (lowrowmask);
874874

875875
for (IT j = bstart ; j < bend ; ++j) // for all blocks inside that block row
876876
{
@@ -1350,8 +1350,9 @@ ofstream & BiCsb<NT, IT>::PrintStats(ofstream & outfile) const
13501350
outfile << "## Number of real blocks is "<< ntop << endl;
13511351
outfile << "## Row imbalance is " << RowImbalance(*this) << endl;
13521352
outfile << "## Col imbalance is " << ColImbalance(*this) << endl;
1353+
#ifdef STATS
13531354
outfile << "## Block parallel calls is " << blockparcalls.get_value() << endl;
1354-
1355+
#endif
13551356
std::vector<int> blocksizes(ntop);
13561357
for(IT i=0; i<nbr; ++i)
13571358
{

bmcsb.h

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@ class BmCsb
3131
ofstream & PrintStats(ofstream & outfile) const;
3232
IT colsize() const { return n;}
3333
IT rowsize() const { return m;}
34+
IT numnonzeros() const { return nz; }
3435
IT numregb() const { return nrb;}
3536
bool isPar() const { return ispar; }
3637

@@ -66,12 +67,12 @@ class BmCsb
6667

6768
IT rowlowbits; // # lower order bits for rows
6869
IT rowhighbits;
69-
IT highrowmask; // mask with the first log(m)/2 bits = 1 and the other bits = 0
70+
IT highrowmask; // mask with the first log(m)/2 bits = 1 and the other bits = 0
7071
IT lowrowmask;
7172

7273
IT collowbits; // # lower order bits for columns
7374
IT colhighbits;
74-
IT highcolmask; // mask with the first log(n)/2 bits = 1 and the other bits = 0
75+
IT highcolmask; // mask with the first log(n)/2 bits = 1 and the other bits = 0
7576
IT lowcolmask;
7677

7778
MortonCompare<IT> mortoncmp; // comparison operator w.r.t. the (inverted N)-morton layout

both_test.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,7 @@ using namespace std;
3131

3232
int main(int argc, char* argv[])
3333
{
34-
#ifndef CILK_STUB
34+
#if CILK==1
3535
int gl_nworkers = __cilkrts_get_nworkers();
3636
#else
3737
int gl_nworkers = 0;

csb_spmv_test.cpp

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,11 @@
1212
#include "cilk_util.h"
1313
#include "utility.h"
1414

15+
#ifndef RHSDIM
16+
#define RHSDIM 16
17+
#endif
18+
#define ALIGN 32
19+
1520
#include "triple.h"
1621
#include "csc.h"
1722
#include "bicsb.h"
@@ -32,7 +37,7 @@ using namespace std;
3237

3338
int main(int argc, char* argv[])
3439
{
35-
#ifndef CILK_STUB
40+
#if CILK==1
3641
int gl_nworkers = __cilkrts_get_nworkers();
3742
#else
3843
int gl_nworkers = 0;

csb_spmvt_test.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,7 @@ INDEXTYPE flops;
3030

3131
int main(int argc, char* argv[])
3232
{
33-
#ifndef CILK_STUB
33+
#if CILK==1
3434
int gl_nworkers = __cilkrts_get_nworkers();
3535
#else
3636
int gl_nworkers = 0;

csc.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -98,7 +98,7 @@ Csc<T,ITYPE>::~Csc()
9898
// (a) triples only contain the upper triangular part, or (b) the whole matrix
9999
template <class T, class ITYPE>
100100
Csc<T,ITYPE>::Csc(Triple<T, ITYPE> * triples, ITYPE size, ITYPE rows, ITYPE cols, bool isSym)
101-
:nz(size),m(rows),n(cols),issym(isSym)
101+
:issym(isSym), nz(size),m(rows),n(cols)
102102
{
103103
// Constructing empty Csc objects (size = 0) are not allowed.
104104
assert(size != 0 && n != 0);
@@ -174,7 +174,7 @@ Csc<T,ITYPE>::Csc(Triple<T, ITYPE> * triples, ITYPE size, ITYPE rows, ITYPE cols
174174
// Construct a Csc object from parallel arrays
175175
template <class T, class ITYPE>
176176
Csc<T,ITYPE>::Csc(ITYPE * ri, ITYPE * ci, T * val, ITYPE size, ITYPE rows, ITYPE cols, bool isSym)
177-
:nz(size),m(rows),n(cols),issym(isSym)
177+
:issym(isSym),nz(size),m(rows),n(cols)
178178
{
179179
// Constructing empty Csc objects (size = 0) are not allowed.
180180
assert(size != 0 && n != 0);

csc.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@ template <class T, class ITYPE>
1515
class Csc
1616
{
1717
public:
18-
Csc ():nz(0), m(0), n(0), logicalnz(0), issym(false) {} // default constructor
18+
Csc (): issym(false), logicalnz(0), nz(0), m(0), n(0) {} // default constructor
1919
Csc (ITYPE size,ITYPE rows, ITYPE cols, bool isSym=false);
2020
Csc (const Csc<T, ITYPE> & rhs); // copy constructor
2121
~Csc();

0 commit comments

Comments
 (0)