Skip to content

Commit 53bd030

Browse files
committed
DPL: add variables to DataDescriptorMatcher
This introduces the ability to build matchers that refer to some variable in a context. The context itself can either be prefilled, or filled on the fly on first match.
1 parent 83dcad1 commit 53bd030

File tree

5 files changed

+239
-91
lines changed

5 files changed

+239
-91
lines changed

Framework/Core/include/Framework/DataDescriptorMatcher.h

Lines changed: 113 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -16,28 +16,68 @@
1616
#include <cstdint>
1717
#include <string>
1818
#include <variant>
19+
#include <vector>
1920

2021
namespace o2
2122
{
2223
namespace framework
2324
{
25+
namespace data_matcher
26+
{
27+
28+
/// Marks an empty item in the context
29+
struct None {
30+
};
31+
32+
/// A typesafe reference to an element of the context.
33+
struct ContextRef {
34+
size_t index;
35+
};
36+
37+
/// An element of the matching context. Context itself is really a vector of
38+
/// those. It's up to the matcher builder to build the vector in a suitable way.
39+
/// We do not have any float in the value, because AFAICT there is no need for
40+
/// it in the O2 DataHeader, however we could add it later on.
41+
struct ContextElement {
42+
std::string label; /// The name of the variable contained in this element.
43+
std::variant<uint64_t, std::string, None> value; /// The actual contents of the element.
44+
};
2445

2546
/// Something which can be matched against a header::DataOrigin
2647
class OriginValueMatcher
2748
{
2849
public:
50+
/// Initialise the matcher with an actual value
2951
OriginValueMatcher(std::string const& s)
3052
: mValue{ s }
3153
{
3254
}
3355

34-
bool match(header::DataHeader const& header) const
56+
/// This means that the matcher will fill a variable in the context if
57+
/// the ref points to none or use the dereferenced value, if not.
58+
OriginValueMatcher(ContextRef variableId)
59+
: mValue{ variableId }
3560
{
36-
return strncmp(header.dataOrigin.str, mValue.c_str(), 4) == 0;
61+
}
62+
63+
bool match(header::DataHeader const& header, std::vector<ContextElement>& context) const
64+
{
65+
if (auto ref = std::get_if<ContextRef>(&mValue)) {
66+
auto& variable = context.at(ref->index);
67+
if (auto value = std::get_if<std::string>(&variable.value)) {
68+
return strncmp(header.dataOrigin.str, value->c_str(), 4) == 0;
69+
}
70+
auto maxSize = strnlen(header.dataOrigin.str, 4);
71+
variable.value = std::string(header.dataOrigin.str, maxSize);
72+
return true;
73+
} else if (auto s = std::get_if<std::string>(&mValue)) {
74+
return strncmp(header.dataOrigin.str, s->c_str(), 4) == 0;
75+
}
76+
throw std::runtime_error("Mismatching type for variable");
3777
}
3878

3979
private:
40-
std::string mValue;
80+
std::variant<std::string, ContextRef> mValue;
4181
};
4282

4383
/// Something which can be matched against a header::DataDescription
@@ -49,13 +89,31 @@ class DescriptionValueMatcher
4989
{
5090
}
5191

52-
bool match(header::DataHeader const& header) const
92+
/// This means that the matcher will fill a variable in the context if
93+
/// the ref points to none or use the dereferenced value, if not.
94+
DescriptionValueMatcher(ContextRef ref)
95+
: mValue{ ref }
96+
{
97+
}
98+
99+
bool match(header::DataHeader const& header, std::vector<ContextElement>& context) const
53100
{
54-
return strncmp(header.dataDescription.str, mValue.c_str(), 8) == 0;
101+
if (auto ref = std::get_if<ContextRef>(&mValue)) {
102+
auto& variable = context.at(ref->index);
103+
if (auto value = std::get_if<std::string>(&variable.value)) {
104+
return strncmp(header.dataDescription.str, value->c_str(), 16) == 0;
105+
}
106+
auto maxSize = strnlen(header.dataDescription.str, 16);
107+
variable.value = std::string(header.dataDescription.str, maxSize);
108+
return true;
109+
} else if (auto s = std::get_if<std::string>(&mValue)) {
110+
return strncmp(header.dataDescription.str, s->c_str(), 16) == 0;
111+
}
112+
throw std::runtime_error("Mismatching type for variable");
55113
}
56114

57115
private:
58-
std::string mValue;
116+
std::variant<std::string, ContextRef> mValue;
59117
};
60118

61119
/// Something which can be matched against a header::SubSpecificationType
@@ -65,21 +123,40 @@ class SubSpecificationTypeValueMatcher
65123
/// The passed string @a s is the expected numerical value for
66124
/// the SubSpecification type.
67125
SubSpecificationTypeValueMatcher(std::string const& s)
126+
: SubSpecificationTypeValueMatcher(strtoull(s.c_str(), nullptr, 10))
68127
{
69-
mValue = strtoull(s.c_str(), nullptr, 10);
70128
}
71129

130+
/// This means that the matcher is looking for a constant.
72131
SubSpecificationTypeValueMatcher(uint64_t v)
132+
: mValue{ v }
73133
{
74-
mValue = v;
75134
}
76-
bool match(header::DataHeader const& header) const
135+
136+
/// This means that the matcher will fill a variable in the context if
137+
/// the ref points to none or use the dereferenced value, if not.
138+
SubSpecificationTypeValueMatcher(ContextRef ref)
139+
: mValue{ ref }
77140
{
78-
return header.subSpecification == mValue;
141+
}
142+
143+
bool match(header::DataHeader const& header, std::vector<ContextElement>& context) const
144+
{
145+
if (auto ref = std::get_if<ContextRef>(&mValue)) {
146+
auto& variable = context.at(ref->index);
147+
if (auto value = std::get_if<uint64_t>(&variable.value)) {
148+
return header.subSpecification == *value;
149+
}
150+
variable.value = header.subSpecification;
151+
return true;
152+
} else if (auto v = std::get_if<uint64_t>(&mValue)) {
153+
return header.subSpecification == *v;
154+
}
155+
throw std::runtime_error("Mismatching type for variable");
79156
}
80157

81158
private:
82-
uint64_t mValue;
159+
std::variant<uint64_t, ContextRef> mValue;
83160
};
84161

85162
/// Something which can be matched against a header::SubSpecificationType
@@ -92,6 +169,7 @@ class ConstantValueMatcher
92169
{
93170
mValue = value;
94171
}
172+
95173
bool match(header::DataHeader const& header) const
96174
{
97175
return mValue;
@@ -107,7 +185,7 @@ struct DescriptorMatcherTrait {
107185

108186
template <>
109187
struct DescriptorMatcherTrait<header::DataOrigin> {
110-
using Matcher = framework::OriginValueMatcher;
188+
using Matcher = OriginValueMatcher;
111189
};
112190

113191
template <>
@@ -147,33 +225,34 @@ class DataDescriptorMatcher
147225

148226
/// @return true if the (sub-)query associated to this matcher will
149227
/// match the provided @a spec, false otherwise.
150-
bool match(InputSpec const& spec) const
228+
bool match(InputSpec const& spec, std::vector<ContextElement>& context) const
151229
{
152230
header::DataHeader dh;
153231
dh.dataOrigin = spec.origin;
154232
dh.dataDescription = spec.description;
155233
dh.subSpecification = spec.subSpec;
156234

157-
return this->match(dh);
235+
return this->match(dh, context);
158236
}
159237

160-
bool match(header::DataHeader const& d) const
238+
bool match(header::DataHeader const& d, std::vector<ContextElement>& context) const
161239
{
162-
auto eval = [&d](auto&& arg) -> bool {
163-
using T = std::decay_t<decltype(arg)>;
164-
if constexpr (std::is_same_v<T, std::unique_ptr<DataDescriptorMatcher>>) {
165-
return arg->match(d);
166-
} else {
167-
return arg.match(d);
168-
}
169-
};
170-
171240
bool leftValue = false, rightValue = false;
172241

173242
// FIXME: Using std::visit is not API compatible due to a new
174243
// exception being thrown. This is the ABI compatible version.
175244
// Replace with:
176245
//
246+
// auto eval = [&d](auto&& arg) -> bool {
247+
// using T = std::decay_t<decltype(arg)>;
248+
// if constexpr (std::is_same_v<T, std::unique_ptr<DataDescriptorMatcher>>) {
249+
// return arg->match(d, context);
250+
// if constexpr (std::is_same_v<T, ConstantValueMatcher>) {
251+
// return arg->match(d);
252+
// } else {
253+
// return arg.match(d, context);
254+
// }
255+
// };
177256
// switch (mOp) {
178257
// case Op::Or:
179258
// return std::visit(eval, mLeft) || std::visit(eval, mRight);
@@ -185,29 +264,28 @@ class DataDescriptorMatcher
185264
// return std::visit(eval, mLeft);
186265
// }
187266
// When we drop support for macOS 10.13
188-
189267
if (auto pval0 = std::get_if<OriginValueMatcher>(&mLeft)) {
190-
leftValue = pval0->match(d);
268+
leftValue = pval0->match(d, context);
191269
} else if (auto pval1 = std::get_if<DescriptionValueMatcher>(&mLeft)) {
192-
leftValue = pval1->match(d);
270+
leftValue = pval1->match(d, context);
193271
} else if (auto pval2 = std::get_if<SubSpecificationTypeValueMatcher>(&mLeft)) {
194-
leftValue = pval2->match(d);
272+
leftValue = pval2->match(d, context);
195273
} else if (auto pval3 = std::get_if<std::unique_ptr<DataDescriptorMatcher>>(&mLeft)) {
196-
leftValue = (*pval3)->match(d);
274+
leftValue = (*pval3)->match(d, context);
197275
} else if (auto pval4 = std::get_if<ConstantValueMatcher>(&mLeft)) {
198276
leftValue = pval4->match(d);
199277
} else {
200278
throw std::runtime_error("Bad parsing tree");
201279
}
202280

203281
if (auto pval0 = std::get_if<OriginValueMatcher>(&mRight)) {
204-
rightValue = pval0->match(d);
282+
rightValue = pval0->match(d, context);
205283
} else if (auto pval1 = std::get_if<DescriptionValueMatcher>(&mRight)) {
206-
rightValue = pval1->match(d);
284+
rightValue = pval1->match(d, context);
207285
} else if (auto pval2 = std::get_if<SubSpecificationTypeValueMatcher>(&mRight)) {
208-
rightValue = pval2->match(d);
286+
rightValue = pval2->match(d, context);
209287
} else if (auto pval3 = std::get_if<std::unique_ptr<DataDescriptorMatcher>>(&mRight)) {
210-
rightValue = (*pval3)->match(d);
288+
rightValue = (*pval3)->match(d, context);
211289
} else if (auto pval4 = std::get_if<ConstantValueMatcher>(&mRight)) {
212290
rightValue = pval4->match(d);
213291
}
@@ -231,7 +309,8 @@ class DataDescriptorMatcher
231309
Node mRight;
232310
};
233311

234-
} // naemspace framework
312+
} // namespace data_matcher
313+
} // namespace framework
235314
} // namespace o2
236315

237316
#endif // o2_framework_DataDescriptorMatcher_H_INCLUDED

Framework/Core/include/Framework/DataDescriptorQueryBuilder.h

Lines changed: 16 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,22 @@ namespace o2
1919
namespace framework
2020
{
2121

22+
namespace data_matcher
23+
{
2224
class DataDescriptorMatcher;
25+
}
26+
27+
/// Struct describing a query.
28+
/// @a variableNames is filled with the variables which are
29+
/// referenced in the matcher string. We return it as part of the query so that it can
30+
/// be eventually passed to a different query builder which wants to use the
31+
/// same variable names. Alternatively we could simply build the query in
32+
/// one go and return the number of
33+
/// variables required by the context. Not sure what's the best approach.
34+
struct DataDescriptorQuery {
35+
std::vector<std::string> variableNames;
36+
std::shared_ptr<data_matcher::DataDescriptorMatcher> matcher;
37+
};
2338

2439
/// Various utilities to manipulate InputSpecs
2540
struct DataDescriptorQueryBuilder {
@@ -34,7 +49,7 @@ struct DataDescriptorQueryBuilder {
3449
/// config := spec;spec;...
3550
///
3651
/// Example for config: TPC/CLUSTER/0;ITS/TRACKS/1
37-
static std::shared_ptr<DataDescriptorMatcher> buildFromKeepConfig(std::string const& config);
52+
static DataDescriptorQuery buildFromKeepConfig(std::string const& config);
3853
};
3954

4055
} // namespace framework

Framework/Core/src/CommonDataProcessors.cxx

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,8 @@
2626
#include <memory>
2727
#include <string>
2828

29+
using namespace o2::framework::data_matcher;
30+
2931
namespace o2
3032
{
3133
namespace framework
@@ -43,9 +45,10 @@ DataProcessorSpec CommonDataProcessors::getGlobalFileSink(std::vector<InputSpec>
4345
}
4446

4547
bool hasOutputsToWrite = false;
46-
auto outputMatcher = DataDescriptorQueryBuilder::buildFromKeepConfig(keepString);
48+
auto [variables, outputMatcher] = DataDescriptorQueryBuilder::buildFromKeepConfig(keepString);
49+
std::vector<ContextElement> context(variables.size());
4750
for (auto& spec : danglingOutputInputs) {
48-
if (outputMatcher->match(spec)) {
51+
if (outputMatcher->match(spec, context)) {
4952
hasOutputsToWrite = true;
5053
}
5154
}
@@ -62,13 +65,14 @@ DataProcessorSpec CommonDataProcessors::getGlobalFileSink(std::vector<InputSpec>
6265
});
6366
}
6467
auto output = std::make_shared<std::ofstream>(filename.c_str(), std::ios_base::binary);
65-
return std::move([ output, matcher = outputMatcher ](ProcessingContext & pc) mutable->void {
68+
return std::move([ output, matcher = outputMatcher, contextSize = variables.size() ](ProcessingContext & pc) mutable->void {
69+
std::vector<ContextElement> matchingContext(contextSize);
6670
LOG(INFO) << "processing data set with " << pc.inputs().size() << " entries";
6771
for (const auto& entry : pc.inputs()) {
6872
LOG(INFO) << " " << *(entry.spec);
6973
auto header = DataRefUtils::getHeader<header::DataHeader*>(entry);
7074
auto dataProcessingHeader = DataRefUtils::getHeader<DataProcessingHeader*>(entry);
71-
if (matcher->match(*header) == false) {
75+
if (matcher->match(*header, matchingContext) == false) {
7276
continue;
7377
}
7478
output->write(reinterpret_cast<char const*>(header), sizeof(header::DataHeader));

Framework/Core/src/DataDescriptorQueryBuilder.cxx

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -16,12 +16,14 @@
1616
#include <string>
1717
#include <vector>
1818

19+
using namespace o2::framework::data_matcher;
20+
1921
namespace o2
2022
{
2123
namespace framework
2224
{
2325

24-
std::shared_ptr<DataDescriptorMatcher> DataDescriptorQueryBuilder::buildFromKeepConfig(std::string const& config)
26+
DataDescriptorQuery DataDescriptorQueryBuilder::buildFromKeepConfig(std::string const& config)
2527
{
2628
static const std::regex specTokenRE(R"re((\w{1,4})/(\w{1,16})/(\d*))re");
2729
static const std::regex delimiter(",");
@@ -55,7 +57,8 @@ std::shared_ptr<DataDescriptorMatcher> DataDescriptorQueryBuilder::buildFromKeep
5557
result = std::move(next);
5658
}
5759
}
58-
return std::move(result);
60+
61+
return std::move(DataDescriptorQuery{ {}, std::move(result) });
5962
}
6063

6164
} // namespace framework

0 commit comments

Comments
 (0)