Skip to content

Commit bb9a4a2

Browse files
Benedikt Volkelsawenzel
authored andcommitted
InfluxDB and other updates
* add conversion to files that can be uploaded to InlfuxDB * revise description comment at the top of the script * provide global summary in "tabular" JSON format when comparing 2 simulation directories * small updates * possible to inspect global summary from the comparison of 2 sim directories * skip non-existent files in hit comparison (before e.g. ZDC comparison was done although it was deactovated) * couple of minor fixes
1 parent 1674b3d commit bb9a4a2

File tree

1 file changed

+173
-44
lines changed

1 file changed

+173
-44
lines changed

RelVal/o2dpg_release_validation.py

Lines changed: 173 additions & 44 deletions
Original file line numberDiff line numberDiff line change
@@ -1,30 +1,63 @@
11
#!/usr/bin/env python3
22
#
3-
# This is a short script to conveniently wrap the ROOT macro used for release validation comparisons
3+
# Basically, this script allows a user to compare
4+
# 1. 2 corresponding ROOT files containing either histograms or QC Monitoring objects
5+
# 2. 2 corresponding simulation directories
46
#
5-
# Help message:
6-
# usage: o2dpg_release_validation.py [-h] -f INPUT_FILES INPUT_FILES -t {1,2,3,4,5,6,7} [--chi2-value CHI2_VALUE] [--rel-bc-diff REL_BC_DIFF] [--rel-entries-diff REL_ENTRIES_DIFF] [--select-critical]
7-
#
8-
# Wrapping ReleaseValidation macro
7+
# The RelVal suite is run with
8+
# o2dpg_release_validation.py rel-val -i <file-or-sim-dir1> <file-or-sim-dir2>
9+
#
10+
# If 2 sim directories should be compared, it is possible to specify for which parts the RelVal should be done by adding the flags
11+
# --with-<whic-part>
12+
# (see full help message below to see available options)
13+
#
14+
# usage: o2dpg_release_validation.py rel-val [-h] -i INPUT INPUT -t
15+
# {1,2,3,4,5,6,7}
16+
# [--chi2-value CHI2_VALUE]
17+
# [--rel-mean-diff REL_MEAN_DIFF]
18+
# [--rel-entries-diff REL_ENTRIES_DIFF]
19+
# [--select-critical]
20+
# [--threshold THRESHOLD]
21+
# [--with-hits]
22+
# [--detectors [{ITS,TOF,EMC,TRD,PHS,FT0,HMP,MFT,FDD,FV0,MCH,MID,CPV,ZDC,TPC} [{ITS,TOF,EMC,TRD,PHS,FT0,HMP,MFT,FDD,FV0,MCH,MID,CPV,ZDC,TPC} ...]]]
23+
# [--with-tpctracks] [--with-kine]
24+
# [--with-analysis] [--with-qc]
25+
# [--no-plots] [--output OUTPUT]
926
#
1027
# optional arguments:
1128
# -h, --help show this help message and exit
12-
# -f INPUT_FILES INPUT_FILES, --input-files INPUT_FILES INPUT_FILES
13-
# input files for comparison
29+
# -i INPUT INPUT, --input INPUT INPUT
30+
# 2 input files for comparison OR 2 input directories
31+
# from simulation for comparison
1432
# -t {1,2,3,4,5,6,7}, --test {1,2,3,4,5,6,7}
1533
# index of test case
1634
# --chi2-value CHI2_VALUE
1735
# Chi2 threshold
18-
# --rel-bc-diff REL_BC_DIFF
19-
# Threshold of relative difference in normalised bin content
36+
# --rel-mean-diff REL_MEAN_DIFF
37+
# Threshold of relative difference in mean
2038
# --rel-entries-diff REL_ENTRIES_DIFF
2139
# Threshold of relative difference in number of entries
2240
# --select-critical Select the critical histograms and dump to file
23-
41+
# --threshold THRESHOLD
42+
# threshold for how far file sizes are allowed to
43+
# diverge before warning
44+
# --with-hits include hit comparison when RelVal when run on
45+
# simulation directories
46+
# --detectors [{ITS,TOF,EMC,TRD,PHS,FT0,HMP,MFT,FDD,FV0,MCH,MID,CPV,ZDC,TPC} [{ITS,TOF,EMC,TRD,PHS,FT0,HMP,MFT,FDD,FV0,MCH,MID,CPV,ZDC,TPC} ...]]
47+
# include these detectors for hit RelVal
48+
# --with-tpctracks include TPC tracks RelVal when run on simulation
49+
# directories
50+
# --with-kine include kine RelVal when run on simulation directories
51+
# --with-analysis include analysis RelVal when run on simulation
52+
# directories
53+
# --with-qc include QC RelVal when run on simulation directories
54+
# --no-plots disable plotting
55+
# --output OUTPUT, -o OUTPUT
56+
# output directory
2457
import sys
2558
import argparse
2659
from os import environ, makedirs
27-
from os.path import join, abspath, exists, isfile, isdir
60+
from os.path import join, abspath, exists, isfile, isdir, dirname
2861
from glob import glob
2962
from subprocess import Popen
3063
from pathlib import Path
@@ -44,6 +77,9 @@
4477
from ROOT import TFile, gDirectory, gROOT, TChain
4578

4679
DETECTORS_OF_INTEREST_HITS = ["ITS", "TOF", "EMC", "TRD", "PHS", "FT0", "HMP", "MFT", "FDD", "FV0", "MCH", "MID", "CPV", "ZDC", "TPC"]
80+
81+
REL_VAL_SEVERITY_MAP = {"GOOD": 0, "WARNING": 1, "NONCRIT_NC": 2, "CRIT_NC": 3, "BAD": 4}
82+
4783
gROOT.SetBatch()
4884

4985
def is_sim_dir(path):
@@ -259,31 +295,44 @@ def has_severity(filename, severity=("BAD", "CRIT_NC")):
259295
"""
260296
Check if any 2 histograms have a given severity level after RelVal
261297
"""
298+
def rel_val_summary(d):
299+
ret = False
300+
for s in severity:
301+
names = d.get(s)
302+
if not names:
303+
continue
304+
print(f"Histograms for severity {s}:")
305+
for n in names:
306+
print(f" {n}")
307+
ret = True
308+
return ret
309+
310+
def rel_val_summary_global(d):
311+
ret = False
312+
to_print = {k: [] for k in severity}
313+
for s in severity:
314+
for h in d:
315+
if h["test_summary"] in severity:
316+
to_print[s].append(h["name"])
317+
ret = True
318+
for s, names in to_print.items():
319+
if not names:
320+
continue
321+
print(f"Histograms for severity {s}:")
322+
for n in names:
323+
print(f" {n}")
324+
return ret
325+
262326
res = None
263-
ret = False
264327
with open(filename, "r") as f:
265328
# NOTE For now care about the summary. However, we have each test individually, so we could do a more detailed check in the future
266-
res = json.load(f)["test_summary"]
267-
for s in severity:
268-
names = res.get(s)
269-
if not names:
270-
continue
271-
print(f"Histograms for severity {s}:")
272-
for n in names:
273-
print(f" {n}")
274-
ret = True
275-
return ret
329+
res = json.load(f)
276330

331+
# decide whether that is an overall summary or from 2 files only
332+
if "histograms" in res:
333+
return rel_val_summary_global(res["histograms"])
334+
return rel_val_summary(res["test_summary"])
277335

278-
def add_to_summary(paths_to_summary, key, summary_dict):
279-
"""
280-
Extend a list of paths with given severity
281-
"""
282-
summary_list = []
283-
for p in paths_to_summary:
284-
if has_severity(p):
285-
summary_list.append(p)
286-
summary_dict[key] = summary_list
287336

288337
def rel_val_ttree(dir1, dir2, files, output_dir, args, treename="o2sim", *, combine_patterns=None):
289338
"""
@@ -298,15 +347,19 @@ def rel_val_ttree(dir1, dir2, files, output_dir, args, treename="o2sim", *, comb
298347
# possibly combine common files, for instance when they come from different timeframes
299348
if combine_patterns:
300349
for cp in combine_patterns:
301-
to_be_chained1.append([join(dir1, hf) for hf in files if cp in hf])
302-
to_be_chained2.append([join(dir2, hf) for hf in files if cp in hf])
350+
chained1 = [join(dir1, hf) for hf in files if cp in hf]
351+
chained2 = [join(dir2, hf) for hf in files if cp in hf]
352+
if not chained1 or not chained2:
353+
continue
354+
to_be_chained1.append(chained1)
355+
to_be_chained2.append(chained2)
303356
output_dirs.append(f"{cp}_dir")
304357
else:
305358
to_be_chained1 = []
306359
to_be_chained2 = []
307360
for hf in files:
308361
to_be_chained1.append(join(dir1, hf))
309-
to_be_chained2.append(join(dir1, hf))
362+
to_be_chained2.append(join(dir2, hf))
310363
output_dirs.append(f"{hf}_dir")
311364

312365
# paths for chains prepared, output directory names specified, do RelVal
@@ -321,6 +374,39 @@ def rel_val_ttree(dir1, dir2, files, output_dir, args, treename="o2sim", *, comb
321374
return 0
322375

323376

377+
def make_summary(in_dir):
378+
"""
379+
Make a summary per histogram (that should be able to be parsed by Grafana eventually)
380+
"""
381+
file_paths = glob(f"{in_dir}/**/Summary.json", recursive=True)
382+
summary = []
383+
384+
for path in file_paths:
385+
# go through all we found
386+
current_summary = None
387+
print(path)
388+
with open(path, "r") as f:
389+
current_summary = json.load(f)
390+
# remove the file name, used as the top key for this collection
391+
rel_val_path = "/".join(path.split("/")[:-1])
392+
type_global = path.split("/")[1]
393+
type_specific = "/".join(path.split("/")[1:-1])
394+
make_summary = {}
395+
for which_test, flagged_histos in current_summary.items():
396+
# loop over tests done
397+
for flag, histos in flagged_histos.items():
398+
# loop over flags per test
399+
for h in histos:
400+
if h not in make_summary:
401+
# re-arrange to have histogram at the sop
402+
make_summary[h] = {"name": h, "type_global": type_global, "type_specific": type_specific}
403+
# add outcome of test
404+
make_summary[h][which_test] = flag
405+
# re-arrange to list, now each summary["path"] basically contains "rows" and each batch represents the columns
406+
summary.extend([batch for batch in make_summary.values()])
407+
return {"histograms": summary}
408+
409+
324410
def rel_val_histograms(dir1, dir2, files, output_dir, args):
325411
"""
326412
Simply another wrapper to combine multiple files where we expect them to contain histograms already
@@ -359,7 +445,6 @@ def rel_val_sim_dirs(args):
359445
if not exists(output_dir_hits):
360446
makedirs(output_dir_hits)
361447
rel_val_ttree(dir1, dir2, hit_files, output_dir_hits, args, combine_patterns=[f"Hits{d}" for d in args.detectors])
362-
add_to_summary(glob(f"{output_dir_hits}/**/{look_for}", recursive=True), "hist", summary_dict)
363448

364449
# TPC tracks
365450
if args.with_tpctracks:
@@ -368,7 +453,6 @@ def rel_val_sim_dirs(args):
368453
if not exists(output_dir_tpctracks):
369454
makedirs(output_dir_tpctracks)
370455
rel_val_ttree(dir1, dir2, tpctrack_files, output_dir_tpctracks, args, "tpcrec", combine_patterns=["tpctracks.root"])
371-
add_to_summary(glob(f"{output_dir_tpctracks}/**/{look_for}", recursive=True), "tpctracks", summary_dict)
372456

373457
# TPC tracks
374458
if args.with_kine:
@@ -377,7 +461,6 @@ def rel_val_sim_dirs(args):
377461
if not exists(output_dir_kine):
378462
makedirs(output_dir_kine)
379463
rel_val_ttree(dir1, dir2, kine_files, output_dir_kine, args, combine_patterns=["Kine.root"])
380-
add_to_summary(glob(f"{output_dir_kine}/**/{look_for}", recursive=True), "kine", summary_dict)
381464

382465
# Analysis
383466
if args.with_analysis:
@@ -389,7 +472,6 @@ def rel_val_sim_dirs(args):
389472
if not exists(output_dir_analysis):
390473
makedirs(output_dir_analysis)
391474
rel_val_histograms(dir_analysis1, dir_analysis2, analysis_files, output_dir_analysis, args)
392-
add_to_summary(glob(f"{output_dir_analysis}/**/{look_for}", recursive=True), "analysis", summary_dict)
393475

394476
# QC
395477
if args.with_qc:
@@ -400,10 +482,10 @@ def rel_val_sim_dirs(args):
400482
if not exists(output_dir_qc):
401483
makedirs(output_dir_qc)
402484
rel_val_histograms(dir_qc1, dir_qc2, qc_files, output_dir_qc, args)
403-
add_to_summary(glob(f"{output_dir_qc}/**/{look_for}", recursive=True), "qc", summary_dict)
404485

405-
with open(join(output_dir, "SummaryToBeChecked.json"), "w") as f:
406-
json.dump(summary_dict, f, indent=2)
486+
with open(join(output_dir, "SummaryGlobal.json"), "w") as f:
487+
json.dump(make_summary(output_dir), f, indent=2)
488+
407489

408490
def rel_val(args):
409491
"""
@@ -429,6 +511,47 @@ def inspect(args):
429511
"""
430512
return has_severity(args.file, args.severity)
431513

514+
515+
def influx(args):
516+
"""
517+
Create an influxDB metrics file
518+
"""
519+
output_dir = args.dir
520+
json_in = join(output_dir, "SummaryGlobal.json")
521+
if not exists(json_in):
522+
print(f"Cannot find expected JSON summary {json_in}.")
523+
return 1
524+
525+
table_name = f"{args.table_prefix}_ReleaseValidation"
526+
tags_out = ""
527+
if args.tags:
528+
for t in args.tags:
529+
t_split = t.split("=")
530+
if len(t_split) != 2:
531+
print(f"ERROR: Invalid format of tags {t} for InfluxDB")
532+
return 1
533+
# we take it apart and put it back together again to make sure there are no whitespaces etc
534+
tags_out += f",{t_split[0].strip()}={t_split[1].strip()}"
535+
536+
# always the same
537+
row_tags = table_name + tags_out
538+
539+
out_file = join(output_dir, "influxDB.dat")
540+
541+
in_list = None
542+
with open(json_in, "r") as f:
543+
in_list = json.load(f)["histograms"]
544+
with open(out_file, "w") as f:
545+
for h in in_list:
546+
s = f"{row_tags},type_global={h['type_global']},type_specific={h['type_specific']} histogram_name={h['name']}"
547+
for k, v in h.items():
548+
# add all tests - do it dynamically because more might be added in the future
549+
if "test_" not in k:
550+
continue
551+
s += f",{k}={REL_VAL_SEVERITY_MAP[v]}"
552+
f.write(f"{s}\n")
553+
554+
432555
def main():
433556
"""entry point when run directly from command line"""
434557
parser = argparse.ArgumentParser(description='Wrapping ReleaseValidation macro')
@@ -438,7 +561,7 @@ def main():
438561

439562
sub_parsers = parser.add_subparsers(dest="command")
440563
rel_val_parser = sub_parsers.add_parser("rel-val", parents=[common_file_parser])
441-
rel_val_parser.add_argument("-t", "--test", type=int, help="index of test case", choices=list(range(1, 8)), required=True)
564+
rel_val_parser.add_argument("-t", "--test", type=int, help="index of test case", choices=list(range(1, 8)), default=7)
442565
rel_val_parser.add_argument("--chi2-value", dest="chi2_value", type=float, help="Chi2 threshold", default=1.5)
443566
rel_val_parser.add_argument("--rel-mean-diff", dest="rel_mean_diff", type=float, help="Threshold of relative difference in mean", default=1.5)
444567
rel_val_parser.add_argument("--rel-entries-diff", dest="rel_entries_diff", type=float, help="Threshold of relative difference in number of entries", default=0.01)
@@ -451,14 +574,20 @@ def main():
451574
rel_val_parser.add_argument("--with-analysis", dest="with_analysis", action="store_true", help="include analysis RelVal when run on simulation directories")
452575
rel_val_parser.add_argument("--with-qc", dest="with_qc", action="store_true", help="include QC RelVal when run on simulation directories")
453576
rel_val_parser.add_argument("--no-plots", dest="no_plots", action="store_true", help="disable plotting")
454-
rel_val_parser.add_argument("--output", "-o", help="output directory", default="./")
577+
rel_val_parser.add_argument("--output", "-o", help="output directory", default="rel_val")
455578
rel_val_parser.set_defaults(func=rel_val)
456579

457580
inspect_parser = sub_parsers.add_parser("inspect")
458581
inspect_parser.add_argument("file", help="pass a JSON produced from ReleaseValidation (rel-val)")
459-
inspect_parser.add_argument("--severity", nargs="*", default=["BAD", "CRIT_NC"], choices=["GOOD", "WARNING", "BAD", "CRIT_NC", "NONCRIT_NC"], help="Choose severity levels to search for")
582+
inspect_parser.add_argument("--severity", nargs="*", default=["BAD", "CRIT_NC"], choices=REL_VAL_SEVERITY_MAP.keys(), help="Choose severity levels to search for")
460583
inspect_parser.set_defaults(func=inspect)
461584

585+
influx_parser = sub_parsers.add_parser("influx")
586+
influx_parser.add_argument("--dir", help="directory where ReleaseValidation was run", required=True)
587+
influx_parser.add_argument("--tags", nargs="*", help="tags to be added for influx, list of key=value")
588+
influx_parser.add_argument("--table-prefix", dest="table_prefix", help="prefix for table name", default="O2DPG_MC")
589+
influx_parser.set_defaults(func=influx)
590+
462591
args = parser.parse_args()
463592
return(args.func(args))
464593

0 commit comments

Comments
 (0)