Skip to content

Commit b1bb9cf

Browse files
committed
Shared mem support / production improvements for o2sim_parallel
This commit provides several important changes towards a production ready parallel o2sim (as described at CHEP18). Most notably, this concerns the support of a shared memory treatment of the hits in order to avoid costly ROOT serializations to communicate between simulation workers and the hit IO process. The solution put forward here works by creating a single sufficiantly large shared memory region in which individual simulation workers get assigned a private space. The HitMerger can read from these spaces and write hits to file. Care is being taken in order to ensure that the shared memory is mapped under the same address space in all processes in order to be able to share complex C++ objects. If this mapping fails, the system will automatically turn to using zmq + ROOT serialization mode. Some changes on the hit types needed to be done: - usage of a special allocator - move hits to classes instead of "using statements" in order to work with allocators - creation/destruction of hits is handled by some special allocation/deallocation functions The commit contains other minor changes that move the o2sim_parallel a lot closer to a production ready version. Beware, that this is a first version. Some future cleanups/refactorings/improvements are expected. Importantly, we now have a working demonstrator for sharing complex data amongst multiple processes. Other minor changes include: - make forked sim mode default - better signal handling in driver executable - add passive detectors by default - make chunksize configurable - o2sim and o2sim_parallel give identical results due to special seeding which is applied for both
1 parent 085499e commit b1bb9cf

File tree

51 files changed

+1391
-114
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

51 files changed

+1391
-114
lines changed

Common/SimConfig/include/SimConfig/SimConfig.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,8 @@ struct SimConfigData {
3838
// Foo.parameter1=x,Bar.parameter2=y,Baz.paramter3=hello
3939
// (can be used to **loosly** change any configuration parameter from
4040
// command-line)
41+
int mPrimaryChunkSize; // defining max granularity for input primaries of a sim job
42+
int mInternalChunkSize;
4143
ClassDefNV(SimConfigData, 1);
4244
};
4345

@@ -93,6 +95,8 @@ class SimConfig
9395
std::string getLogVerbosity() const { return mConfigData.mLogVerbosity; }
9496
std::string getLogSeverity() const { return mConfigData.mLogSeverity; }
9597
std::string getKeyValueString() const { return mConfigData.mKeyValueTokens; }
98+
int getPrimChunkSize() const { return mConfigData.mPrimaryChunkSize; }
99+
int getInternalChunkSize() const { return mConfigData.mInternalChunkSize; }
96100

97101
private:
98102
SimConfigData mConfigData; //!

Common/SimConfig/src/SimConfig.cxx

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,7 @@ void SimConfig::initOptions(boost::program_options::options_description& options
3636
"outPrefix,o", bpo::value<std::string>()->default_value("o2sim"), "prefix of output files")(
3737
"logseverity", bpo::value<std::string>()->default_value("INFO"), "severity level for FairLogger")(
3838
"logverbosity", bpo::value<std::string>()->default_value("low"), "level of verbosity for FairLogger (low, medium, high, veryhigh)")(
39-
"configKeyValues", bpo::value<std::string>()->default_value(""), "comma separated key=value strings (e.g.: 'TPC.gasDensity=1,...");
39+
"configKeyValues", bpo::value<std::string>()->default_value(""), "comma separated key=value strings (e.g.: 'TPC.gasDensity=1,...")("chunkSize", bpo::value<unsigned int>()->default_value(10000), "max size of primary chunk (subevent) distributed by server")("chunkSizeI", bpo::value<int>()->default_value(-1), "internalChunkSize");
4040
}
4141

4242
bool SimConfig::resetFromParsedMap(boost::program_options::variables_map const& vm)
@@ -50,6 +50,13 @@ bool SimConfig::resetFromParsedMap(boost::program_options::variables_map const&
5050
for (int d = DetID::First; d <= DetID::Last; ++d) {
5151
active.emplace_back(DetID::getName(d));
5252
}
53+
// add passive components manually (make a PassiveDetID for them!)
54+
active.emplace_back("HALL");
55+
active.emplace_back("MAG");
56+
active.emplace_back("DIPO");
57+
active.emplace_back("PIPE");
58+
active.emplace_back("ABSO");
59+
active.emplace_back("SHIL");
5360
}
5461
// now we take out detectors listed as skipped
5562
auto& skipped = vm["skipModules"].as<std::vector<std::string>>();
@@ -73,6 +80,8 @@ bool SimConfig::resetFromParsedMap(boost::program_options::variables_map const&
7380
mConfigData.mLogSeverity = vm["logseverity"].as<std::string>();
7481
mConfigData.mLogVerbosity = vm["logverbosity"].as<std::string>();
7582
mConfigData.mKeyValueTokens = vm["configKeyValues"].as<std::string>();
83+
mConfigData.mPrimaryChunkSize = vm["chunkSize"].as<unsigned int>();
84+
mConfigData.mInternalChunkSize = vm["chunkSizeI"].as<int>();
7685
return true;
7786
}
7887

Common/Utils/CMakeLists.txt

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,13 +7,15 @@ set(SRCS
77
src/TreeStreamRedirector.cxx
88
src/RootChain.cxx
99
src/CompStream.cxx
10+
src/ShmManager.cxx
1011
)
1112

1213
Set(HEADERS
1314
include/${MODULE_NAME}/TreeStream.h
1415
include/${MODULE_NAME}/TreeStreamRedirector.h
1516
include/${MODULE_NAME}/RootChain.h
1617
include/${MODULE_NAME}/BoostSerializer.h
18+
include/${MODULE_NAME}/ShmManager.h
1719
)
1820

1921
Set(LINKDEF src/CommonUtilsLinkDef.h)
Lines changed: 142 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,142 @@
1+
// Copyright CERN and copyright holders of ALICE O2. This software is
2+
// distributed under the terms of the GNU General Public License v3 (GPL
3+
// Version 3), copied verbatim in the file "COPYING".
4+
//
5+
// See http://alice-o2.web.cern.ch/license for full licensing information.
6+
//
7+
// In applying this license CERN does not waive the privileges and immunities
8+
// granted to it by virtue of its status as an Intergovernmental Organization
9+
// or submit itself to any jurisdiction.
10+
11+
/*
12+
* ShmAllocator.h
13+
*
14+
* Created on: Jun 18, 2018
15+
* Author: swenzel
16+
*/
17+
18+
#ifndef COMMON_UTILS_INCLUDE_COMMONUTILS_SHMALLOCATOR_H_
19+
#define COMMON_UTILS_INCLUDE_COMMONUTILS_SHMALLOCATOR_H_
20+
21+
#include "CommonUtils/ShmManager.h"
22+
#include <cassert>
23+
24+
namespace o2
25+
{
26+
namespace utils
27+
{
28+
29+
// An allocator placing objects in shared memory as organized by ShmManager.
30+
// Allocator used in a few std::vector so that they create stuff in shared mem
31+
// The only non-trivial things are in methods allocate/deallocate
32+
template <typename T>
33+
class ShmAllocator
34+
{
35+
public:
36+
typedef T value_type;
37+
typedef std::size_t size_type;
38+
typedef std::ptrdiff_t difference_type;
39+
40+
typedef T* pointer;
41+
typedef const T* const_pointer;
42+
43+
typedef T& reference;
44+
typedef const T& const_reference;
45+
46+
public:
47+
inline ShmAllocator() throw() {}
48+
49+
template <typename T2>
50+
inline ShmAllocator(const ShmAllocator<T2>&) throw()
51+
{
52+
}
53+
54+
inline ~ShmAllocator() throw() {}
55+
56+
inline pointer adress(reference r) { return &r; }
57+
58+
inline const_pointer adress(const_reference r) const { return &r; }
59+
60+
// the actually important functions:
61+
inline pointer allocate(size_type n)
62+
{
63+
auto& instance = ShmManager::Instance();
64+
if (instance.readyToAllocate()) {
65+
return (pointer)ShmManager::Instance().getmemblock(sizeof(value_type) * n);
66+
}
67+
return (pointer)malloc(sizeof(value_type) * n);
68+
}
69+
inline void deallocate(pointer p, size_type s)
70+
{
71+
auto& instance = ShmManager::Instance();
72+
if (instance.readyToAllocate()) {
73+
ShmManager::Instance().freememblock(p, s);
74+
} else {
75+
free(p);
76+
}
77+
}
78+
79+
inline void construct(pointer p, const value_type& value)
80+
{
81+
new (p) value_type(value);
82+
}
83+
84+
template <class U, class... Args>
85+
void construct(U* p, Args&&... args)
86+
{
87+
::new ((void*)p) U(std::forward<Args>(args)...);
88+
}
89+
90+
inline void destroy(pointer p) { p->~value_type(); }
91+
92+
inline size_type max_size() const throw() { return size_type(-1) / sizeof(value_type); }
93+
94+
template <typename T2>
95+
struct rebind {
96+
typedef ShmAllocator<T2> other;
97+
};
98+
99+
bool operator!=(const ShmAllocator<T>& other) const { return !(*this == other); }
100+
101+
// Returns true if and only if storage allocated from *this
102+
// can be deallocated from other, and vice versa.
103+
// Always returns true for stateless allocators.
104+
bool operator==(const ShmAllocator<T>& /*other*/) const { return true; }
105+
};
106+
107+
template <typename T>
108+
std::vector<T>* createSimVector()
109+
{
110+
using vector_t = std::vector<T>;
111+
auto& instance = o2::utils::ShmManager::Instance();
112+
if (instance.isOperational() && instance.readyToAllocate()) {
113+
auto placement = instance.hasSegment() ? instance.getmemblock(sizeof(vector_t)) : malloc(sizeof(vector_t));
114+
return new (placement) vector_t;
115+
} else {
116+
return new vector_t;
117+
}
118+
}
119+
120+
template <typename T>
121+
void freeSimVector(std::vector<T>* ptr)
122+
{
123+
return;
124+
using vector_t = std::vector<T>;
125+
#ifdef USESHM
126+
auto& instance = o2::utils::ShmManager::Instance();
127+
ptr->clear();
128+
ptr->shrink_to_fit();
129+
if (instance.hasSegment() && instance.isPointerOk(ptr)) {
130+
instance.freememblock(ptr);
131+
} else {
132+
free(ptr);
133+
}
134+
// at this moment we have to trust that std::
135+
#else
136+
delete ptr;
137+
#endif
138+
}
139+
140+
} // end namespace utils
141+
} // end namespace o2
142+
#endif /* COMMON_UTILS_INCLUDE_COMMONUTILS_SHMALLOCATOR_H_ */
Lines changed: 121 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,121 @@
1+
// Copyright CERN and copyright holders of ALICE O2. This software is
2+
// distributed under the terms of the GNU General Public License v3 (GPL
3+
// Version 3), copied verbatim in the file "COPYING".
4+
//
5+
// See http://alice-o2.web.cern.ch/license for full licensing information.
6+
//
7+
// In applying this license CERN does not waive the privileges and immunities
8+
// granted to it by virtue of its status as an Intergovernmental Organization
9+
// or submit itself to any jurisdiction.
10+
11+
/*
12+
* ShmManager.h
13+
*
14+
* Created on: Jun 17, 2018
15+
* Author: swenzel
16+
*/
17+
18+
#ifndef COMMON_UTILS_INCLUDE_COMMONUTILS_SHMMANAGER_H_
19+
#define COMMON_UTILS_INCLUDE_COMMONUTILS_SHMMANAGER_H_
20+
21+
#include <list>
22+
#include <cstddef>
23+
24+
#include <boost/interprocess/managed_external_buffer.hpp>
25+
#include <boost/interprocess/allocators/allocator.hpp>
26+
27+
#define USESHM 1
28+
29+
namespace o2
30+
{
31+
namespace utils
32+
{
33+
34+
// the size dedicated to each attached worker/process
35+
constexpr size_t SHMPOOLSIZE = 1024 * 1024 * 200; // 200MB
36+
37+
// some meta info stored at the beginning of the global shared mem segment
38+
struct ShmMetaInfo {
39+
unsigned long long allocedbytes = 0;
40+
std::atomic<int> counter = 0; // atomic counter .. counter number of attached processes
41+
// and used to assign a subregion to the attached processes
42+
std::atomic<int> failures = 0;
43+
};
44+
45+
// Class creating -- or attaching to -- a shared memory pool
46+
// and manages allocations within the pool
47+
// This is used in the parallel simulation in order
48+
// to put hits directly in shared mem; I hope this can be replaced/refactored
49+
// to use directly functionality by FairMQ some day.
50+
// For the moment a wrapper around boost allocators ... enhancing them with some state.
51+
class ShmManager
52+
{
53+
public:
54+
static ShmManager& Instance()
55+
{
56+
static ShmManager instance;
57+
return instance;
58+
}
59+
60+
// creates a global shared mem region
61+
// to be used by "nsubsegments" simulation processes
62+
bool createGlobalSegment(int nsubsegments = 1);
63+
64+
// create the local segment
65+
// this will occupy a subregion of an already created global shared mem segment
66+
void occupySegment();
67+
68+
// simply attaches to the global segment
69+
bool attachToGlobalSegment();
70+
71+
// the equivalent of malloc
72+
void* getmemblock(size_t size);
73+
// the equivalent of free
74+
void freememblock(void*, std::size_t = 1);
75+
76+
void release();
77+
int getShmID() const { return mShmID; }
78+
bool hasSegment() const { return mShmID != -1; }
79+
bool readyToAllocate() const { return mShmID != -1 && mBufferPtr; }
80+
81+
// returns if pointer is part of the shm region under control of this manager
82+
bool isPointerOk(void* ptr) const
83+
{
84+
return mBufferPtr && getPointerOffset(ptr) < SHMPOOLSIZE;
85+
}
86+
87+
// returns if shared mem setup is correctly setup/operational
88+
// used to decide whether to communicate via shared mem at runtime or via
89+
// TMessages /etc/
90+
bool isOperational() const { return mSegInfoPtr && mSegInfoPtr->failures == 0; /* mIsOperational; */ }
91+
92+
void disable()
93+
{
94+
if (mSegInfoPtr) {
95+
mSegInfoPtr->failures.fetch_add(1);
96+
};
97+
}
98+
99+
void printSegInfo() const;
100+
101+
private:
102+
ShmManager();
103+
~ShmManager();
104+
int mShmID = -1; // id of shared mem created or used
105+
void* mBufferPtr = nullptr; // the mapped/start ptr of the buffer to use
106+
void* mSegPtr = nullptr; // address of the segment start
107+
ShmMetaInfo* mSegInfoPtr = nullptr; // pointing to the meta information object
108+
bool mIsMaster = false; // true if the manager who allocated the region
109+
bool mIsOperational = false;
110+
// helper function
111+
void* tryAttach(bool& success);
112+
size_t getPointerOffset(void* ptr) const { return (size_t)((char*)ptr - (char*)mBufferPtr); }
113+
114+
boost::interprocess::wmanaged_external_buffer* boostmanagedbuffer;
115+
boost::interprocess::allocator<char, boost::interprocess::wmanaged_external_buffer::segment_manager>* boostallocator;
116+
};
117+
118+
} // namespace utils
119+
} // namespace o2
120+
121+
#endif /* COMMON_UTILS_INCLUDE_COMMONUTILS_SHMMANAGER_H_ */

0 commit comments

Comments
 (0)