Skip to content

Commit e608207

Browse files
authored
Mergers: Retry with gDebug if merging fails (#15153)
...to help understand the error in QC-1340. I get the following with newly added test case: ``` Error in <TH1Merger::DifferentAxesMerge>: Cannot merge histograms - the histograms obj1 can extend the X axis or have different limits and underflows/overflows are present in the histogram obj2. [ERROR] Failed to merge the input object 'obj2' of type 'TH1I and the target object 'obj1' of type 'TH1I' Info in <TH1Merger::ExamineHistogram>: Examine histogram obj1 - labels 0 - same limits 1 - axis found 1 Info in <TH1Merger::ExamineHistogram>: Examine histogram obj2 - labels 0 - same limits 0 - axis found 1 Info in <Merge>: Histogram Merge type is 2 and new axis flag is 1 Info in <DefineNewAxis>: A new X axis has been defined Nbins=20 , [0.000000,20.000000] Info in <TH1Merger::DifferentAxesMerge>: Merging histogram obj1 into obj1 Info in <TH1Merger::DifferentAxesMerge>: Merging histogram obj2 into obj1 Error in <TH1Merger::DifferentAxesMerge>: Cannot merge histograms - the histograms obj1 can extend the X axis or have different limits and underflows/overflows are present in the histogram obj2. [ERROR] Merging 'obj2' and 'obj1' failed again after a retry for debugging purposes. See ROOT warnings for details. ``` What bothers me is that the ROOT error is actually printed there even before gDebug is enabled, while we see no ROOT error in the case of QC-1340, even though stderr output is redirected to infologger by o2-aliecs-executor. Perhaps whathever is triggering the error does not have an error log associated with it in ROOT?
1 parent bd3c02c commit e608207

File tree

2 files changed

+76
-39
lines changed

2 files changed

+76
-39
lines changed

Utilities/Mergers/src/MergerAlgorithm.cxx

Lines changed: 60 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -95,6 +95,52 @@ auto matchCollectedToPairs(const std::vector<TObject*>& targetObjects, const std
9595
return matchedObjects;
9696
}
9797

98+
// calls the default Merge methods of TObjects
99+
Long64_t mergeDefault(TObject* const target, TObject* const other)
100+
{
101+
Long64_t errorCode = 0;
102+
103+
TObjArray otherCollection;
104+
otherCollection.SetOwner(false);
105+
otherCollection.Add(other);
106+
107+
if (target->InheritsFrom(TH1::Class())) {
108+
// this includes TH1, TH2, TH3
109+
auto targetTH1 = reinterpret_cast<TH1*>(target);
110+
if (targetTH1->TestBit(TH1::kIsAverage)) {
111+
// Merge() does not support averages, we have to use Add()
112+
// this will break if collection.size != 1
113+
if (auto otherTH1 = dynamic_cast<TH1*>(otherCollection.First())) {
114+
errorCode = targetTH1->Add(otherTH1);
115+
}
116+
} else {
117+
// Add() does not support histograms with labels, thus we resort to Merge() by default
118+
errorCode = targetTH1->Merge(&otherCollection);
119+
}
120+
} else if (target->InheritsFrom(THnBase::Class())) {
121+
// this includes THn and THnSparse
122+
errorCode = reinterpret_cast<THnBase*>(target)->Merge(&otherCollection);
123+
} else if (target->InheritsFrom(TTree::Class())) {
124+
auto targetTree = reinterpret_cast<TTree*>(target);
125+
auto otherTree = reinterpret_cast<TTree*>(other);
126+
auto targetTreeSize = estimateTreeSize(targetTree);
127+
auto otherTreeSize = estimateTreeSize(otherTree);
128+
if (auto totalSize = targetTreeSize + otherTreeSize; totalSize > 100000000) {
129+
LOG(warn) << "The tree '" << targetTree->GetName() << "' would be larger than 100MB (" << totalSize << "B) after merging, skipping to let the system survive";
130+
errorCode = 0;
131+
} else {
132+
errorCode = targetTree->Merge(&otherCollection);
133+
}
134+
} else if (target->InheritsFrom(TGraph::Class())) {
135+
errorCode = reinterpret_cast<TGraph*>(target)->Merge(&otherCollection);
136+
} else if (target->InheritsFrom(TEfficiency::Class())) {
137+
errorCode = reinterpret_cast<TEfficiency*>(target)->Merge(&otherCollection);
138+
} else {
139+
LOG(warn) << "Object '" + std::string(target->GetName()) + "' with type '" + std::string(target->ClassName()) + "' is not one of the mergeable types, skipping";
140+
}
141+
return errorCode;
142+
}
143+
98144
void merge(TObject* const target, TObject* const other)
99145
{
100146
if (target == nullptr) {
@@ -158,48 +204,23 @@ void merge(TObject* const target, TObject* const other)
158204
}
159205

160206
} else {
161-
Long64_t errorCode = 0;
162-
TObjArray otherCollection;
163-
otherCollection.SetOwner(false);
164-
otherCollection.Add(other);
165-
166-
if (target->InheritsFrom(TH1::Class())) {
167-
// this includes TH1, TH2, TH3
168-
auto targetTH1 = reinterpret_cast<TH1*>(target);
169-
if (targetTH1->TestBit(TH1::kIsAverage)) {
170-
// Merge() does not support averages, we have to use Add()
171-
// this will break if collection.size != 1
172-
if (auto otherTH1 = dynamic_cast<TH1*>(otherCollection.First())) {
173-
errorCode = targetTH1->Add(otherTH1);
174-
}
175-
} else {
176-
// Add() does not support histograms with labels, thus we resort to Merge() by default
177-
errorCode = targetTH1->Merge(&otherCollection);
178-
}
179-
} else if (target->InheritsFrom(THnBase::Class())) {
180-
// this includes THn and THnSparse
181-
errorCode = reinterpret_cast<THnBase*>(target)->Merge(&otherCollection);
182-
} else if (target->InheritsFrom(TTree::Class())) {
183-
auto targetTree = reinterpret_cast<TTree*>(target);
184-
auto otherTree = reinterpret_cast<TTree*>(other);
185-
auto targetTreeSize = estimateTreeSize(targetTree);
186-
auto otherTreeSize = estimateTreeSize(otherTree);
187-
if (auto totalSize = targetTreeSize + otherTreeSize; totalSize > 100000000) {
188-
LOG(warn) << "The tree '" << targetTree->GetName() << "' would be larger than 100MB (" << totalSize << "B) after merging, skipping to let the system survive";
189-
errorCode = 0;
190-
} else {
191-
errorCode = targetTree->Merge(&otherCollection);
192-
}
193-
} else if (target->InheritsFrom(TGraph::Class())) {
194-
errorCode = reinterpret_cast<TGraph*>(target)->Merge(&otherCollection);
195-
} else if (target->InheritsFrom(TEfficiency::Class())) {
196-
errorCode = reinterpret_cast<TEfficiency*>(target)->Merge(&otherCollection);
197-
} else {
198-
LOG(warn) << "Object '" + std::string(target->GetName()) + "' with type '" + std::string(target->ClassName()) + "' is not one of the mergeable types, skipping";
199-
}
207+
Long64_t errorCode = mergeDefault(target, other);
208+
200209
if (errorCode == -1) {
201210
LOG(error) << "Failed to merge the input object '" + std::string(other->GetName()) + "' of type '" + std::string(other->ClassName()) //
202211
+ " and the target object '" + std::string(target->GetName()) + "' of type '" + std::string(target->ClassName()) + "'";
212+
213+
// we retry with debug options enabled in ROOT in hopes to get some logs explaining the issue
214+
gDebug = true;
215+
errorCode = mergeDefault(target, other);
216+
gDebug = false;
217+
if (errorCode == -1) {
218+
LOG(error) << "Merging '" + std::string(other->GetName()) + "' and '" + std::string(target->GetName()) //
219+
+ "' failed again after a retry for debugging purposes. See ROOT warnings for details.";
220+
} else {
221+
LOG(warn) << "Merging '" + std::string(other->GetName()) + "' and '" + std::string(target->GetName()) //
222+
+ "' succeeded after retrying for debugging purposes.";
223+
}
203224
}
204225
}
205226
}

Utilities/Mergers/test/test_Algorithm.cxx

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -97,6 +97,22 @@ BOOST_AUTO_TEST_CASE(MergerSingularObjects)
9797
delete other;
9898
delete target;
9999
}
100+
{
101+
// mismatching axes - merging should fail.
102+
// we should run again merging with gDebug enabled to see more logs from ROOT (tested only manually by visual inspection of logs)
103+
TH1I* target = new TH1I("obj1", "obj1", bins, min, max);
104+
target->Fill(5);
105+
TH1I* other = new TH1I("obj2", "obj2", bins, max, max + 10);
106+
other->Fill(2);
107+
other->Fill(2);
108+
109+
BOOST_CHECK_NO_THROW(algorithm::merge(target, other));
110+
BOOST_CHECK_EQUAL(target->GetBinContent(target->FindBin(2)), 0);
111+
BOOST_CHECK_EQUAL(target->GetBinContent(target->FindBin(5)), 1);
112+
113+
delete other;
114+
delete target;
115+
}
100116
{
101117
TH2I* target = new TH2I("obj1", "obj1", bins, min, max, bins, min, max);
102118
target->Fill(5, 5);

0 commit comments

Comments
 (0)