Skip to content

Commit ed7cc86

Browse files
benedikt-voelkelBenedikt Volkel
andauthored
[Anchor] Add test for anchored MC (#1464)
Runs a very simple anchored production, 2TFs, 50 pp events each Co-authored-by: Benedikt Volkel <benedikt.volkel@cern.ch>
1 parent 7f48808 commit ed7cc86

File tree

5 files changed

+139
-7
lines changed

5 files changed

+139
-7
lines changed

MC/run/ANCHOR/anchorMC.sh

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -241,6 +241,8 @@ if [[ -z "${DISABLE_QC}" && "${MCRC}" = "0" && "${remainingargs}" == *"--include
241241
# do QC tasks
242242
echo "Doing QC"
243243
${O2DPG_ROOT}/MC/bin/o2_dpg_workflow_runner.py -f workflow.json --target-labels QC --cpu-limit ${ALIEN_JDL_CPULIMIT:-8} -k
244+
# NOTE that with the -k|--keep-going option, the runner will try to keep on executing even if some tasks fail.
245+
# That means, even if there is a failing QC task, the return code will be 0
244246
MCRC=$?
245247
fi
246248

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,36 @@
1+
#!/bin/bash
2+
3+
#
4+
# An example steering script for anchored MC simulations, PbPb
5+
#
6+
7+
# example anchoring
8+
# taken from https://its.cern.ch/jira/browse/O2-4586
9+
export ALIEN_JDL_LPMANCHORPASSNAME=apass2
10+
export ALIEN_JDL_MCANCHOR=apass2
11+
export ALIEN_JDL_COLLISIONSYSTEM=Pb-Pb
12+
export ALIEN_JDL_CPULIMIT=8
13+
export ALIEN_JDL_LPMPASSNAME=apass2
14+
export ALIEN_JDL_LPMRUNNUMBER=544121
15+
export ALIEN_JDL_LPMPRODUCTIONTYPE=MC
16+
export ALIEN_JDL_LPMINTERACTIONTYPE=PbPb
17+
export ALIEN_JDL_LPMPRODUCTIONTAG=LHC24a1
18+
export ALIEN_JDL_LPMANCHORRUN=544121
19+
export ALIEN_JDL_LPMANCHORPRODUCTION=LHC23zzh
20+
export ALIEN_JDL_LPMANCHORYEAR=2023
21+
22+
export NTIMEFRAMES=2
23+
export NSIGEVENTS=2
24+
export SPLITID=100
25+
export PRODSPLIT=153
26+
export CYCLE=0
27+
28+
# on the GRID, this is set, for our use case, we can mimic any job ID
29+
export ALIEN_PROC_ID=2963436952
30+
31+
# run the central anchor steering script; this includes
32+
# * derive timestamp
33+
# * derive interaction rate
34+
# * extract and prepare configurations (which detectors are contained in the run etc.)
35+
# * run the simulation (and QC)
36+
${O2DPG_ROOT}/MC/run/ANCHOR/anchorMC.sh
Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,39 @@
1+
#!/bin/bash
2+
3+
#
4+
# An example steering script for anchored MC simulations, pp
5+
#
6+
7+
# example anchoring
8+
# taken from https://its.cern.ch/jira/browse/O2-4586
9+
export ALIEN_JDL_LPMANCHORPASSNAME=apass2
10+
export ALIEN_JDL_MCANCHOR=apass2
11+
export ALIEN_JDL_COLLISIONSYSTEM=p-p
12+
export ALIEN_JDL_CPULIMIT=8
13+
export ALIEN_JDL_LPMPASSNAME=apass2
14+
export ALIEN_JDL_LPMRUNNUMBER=535069
15+
export ALIEN_JDL_LPMPRODUCTIONTYPE=MC
16+
export ALIEN_JDL_LPMINTERACTIONTYPE=pp
17+
export ALIEN_JDL_LPMPRODUCTIONTAG=LHC24a2
18+
export ALIEN_JDL_LPMANCHORRUN=535069
19+
export ALIEN_JDL_LPMANCHORPRODUCTION=LHC23f
20+
export ALIEN_JDL_LPMANCHORYEAR=2023
21+
22+
export NTIMEFRAMES=2
23+
export NSIGEVENTS=50
24+
export SPLITID=100
25+
export PRODSPLIT=153
26+
export CYCLE=0
27+
28+
# on the GRID, this is set, for our use case, we can mimic any job ID
29+
export ALIEN_PROC_ID=2963436952
30+
31+
# for pp and 50 events per TF, we launch only 4 workers.
32+
export NWORKERS=4
33+
34+
# run the central anchor steering script; this includes
35+
# * derive timestamp
36+
# * derive interaction rate
37+
# * extract and prepare configurations (which detectors are contained in the run etc.)
38+
# * run the simulation (and QC)
39+
${O2DPG_ROOT}/MC/run/ANCHOR/anchorMC.sh

test/common/utils/utils.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -78,7 +78,7 @@ make_wf_creation_script()
7878
print_error_logs()
7979
{
8080
local search_dir=${1}
81-
local search_pattern="TASK-EXIT-CODE: ([1-9][0-9]*)|[Ss]egmentation violation|[Ee]xception caught|\[FATAL\]|uncaught exception|\(int\) ([1-9][0-9]*)|fair::FatalException"
81+
local search_pattern="TASK-EXIT-CODE: ([1-9][0-9]*)|[Ss]egmentation violation|[Ss]egmentation fault|Program crashed|[Ee]xception caught|\[FATAL\]|uncaught exception|\(int\) ([1-9][0-9]*)|fair::FatalException"
8282
local error_files=$(find ${search_dir} -maxdepth 4 -type f \( -name "*.log" -or -name "*serverlog*" -or -name "*workerlog*" -or -name "*mergerlog*" \) | xargs grep -l -E "${search_pattern}" | sort)
8383
for ef in ${error_files} ; do
8484
echo_red "Error found in log $(realpath ${ef})"

test/run_workflow_tests.sh

Lines changed: 61 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -3,12 +3,14 @@
33
# The test parent dir to be cretaed in current directory
44
TEST_PARENT_DIR_PWG="o2dpg_tests/workflows_pwgs"
55
TEST_PARENT_DIR_BIN="o2dpg_tests/workflows_bin"
6+
TEST_PARENT_DIR_ANCHORED="o2dpg_tests/anchored"
67

78
# a global counter for tests
89
TEST_COUNTER=0
910

1011
# unified names of log files
1112
LOG_FILE_WF="o2dpg-test-wf.log"
13+
LOG_FILE_ANCHORED="o2dpg-test-anchored.log"
1214

1315
# Prepare some colored output
1416
SRED="\033[0;31m"
@@ -123,6 +125,26 @@ run_workflow_creation()
123125
return ${RET}
124126
}
125127

128+
test_anchored()
129+
{
130+
local to_run="${1:-${O2DPG_ROOT}/MC/run/ANCHOR/tests/test_anchor_2023_apass2_pp.sh}"
131+
local RET=0
132+
for anchored_script in ${to_run} ; do
133+
[[ ! -f ${anchored_script} ]] && { echo "Desired test script ${anchored_script} does not exist. Skip." ; continue ; }
134+
((TEST_COUNTER++))
135+
local test_dir=${TEST_COUNTER}_$(basename ${anchored_script})_dir
136+
rm -rf ${test_dir} 2> /dev/null
137+
mkdir ${test_dir}
138+
pushd ${test_dir} > /dev/null
139+
echo -n "Test ${TEST_COUNTER}: ${anchored_script}"
140+
${anchored_script} >> ${LOG_FILE_ANCHORED} 2>&1
141+
local ret_this=${?}
142+
[[ "${ret_this}" != "0" ]] && RET=${ret_this}
143+
popd > /dev/null
144+
done
145+
return ${RET}
146+
}
147+
126148
collect_changed_pwg_wf_files()
127149
{
128150
# Collect all INI files which have changed
@@ -188,8 +210,9 @@ source ${REPO_DIR}/test/common/utils/utils.sh
188210
pushd ${REPO_DIR} > /dev/null
189211

190212
# flag if anything changed in the sim workflow bin dir
191-
changed_wf_bin=$(get_changed_files | grep "MC/bin")
213+
changed_wf_bin=$(get_changed_files | grep -E "MC/bin")
192214
changed_wf_bin_related=$(get_changed_files | grep -E "MC/analysis_testing|MC/config/analysis_testing/json|MC/config/QC/json")
215+
changed_anchored_related=$(get_changed_files | grep -E "MC/run/ANCHOR/anchorMC.sh|MC/run/ANCHOR/tests|MC/bin|UTILS/parse-async-WorkflowConfig.py")
193216

194217

195218
# collect what has changed for PWGs
@@ -215,6 +238,27 @@ REPO_DIR=$(realpath ${REPO_DIR})
215238
export O2DPG_ROOT=${REPO_DIR}
216239

217240

241+
###############
242+
# ANCHORED MC #
243+
###############
244+
# prepare our local test directory for PWG tests
245+
rm -rf ${TEST_PARENT_DIR_ANCHORED} 2>/dev/null
246+
mkdir -p ${TEST_PARENT_DIR_ANCHORED} 2>/dev/null
247+
pushd ${TEST_PARENT_DIR_ANCHORED} > /dev/null
248+
249+
# global return code for PWGs
250+
ret_global_anchored=0
251+
if [[ "${changed_anchored_related}" != "" ]] ; then
252+
echo "### Test anchored ###"
253+
# Run an anchored test
254+
test_anchored
255+
ret_global_anchored=${?}
256+
echo
257+
fi
258+
259+
# return to where we came from
260+
popd > /dev/null
261+
218262
########
219263
# PWGs #
220264
########
@@ -226,7 +270,7 @@ pushd ${TEST_PARENT_DIR_PWG} > /dev/null
226270
# global return code for PWGs
227271
ret_global_pwg=0
228272
if [[ "${changed_wf_bin}" != "" ]] ; then
229-
# Run all the PWG related WF creations, hence overwrite what was collected by collect_changed_pwg_wf_files eal=rlier
273+
# Run all the PWG related WF creations, hence overwrite what was collected by collect_changed_pwg_wf_files earlier
230274
WF_FILES=$(get_all_workflows "MC/run/.*/")
231275
echo
232276
fi
@@ -240,7 +284,6 @@ if [[ "${WF_FILES}" != "" ]] ; then
240284
echo
241285
fi
242286

243-
244287
# return to where we came from
245288
popd > /dev/null
246289

@@ -285,9 +328,21 @@ if [[ "${ret_global_bin}" != "0" ]] ; then
285328
echo "###################################"
286329
echo
287330
print_error_logs ${TEST_PARENT_DIR_BIN}
288-
exit ${ret_global_bin}
289331
fi
290332

333+
# However, if a central test fails, exit code will be !=0
334+
if [[ "${ret_global_anchored}" != "0" ]] ; then
335+
echo
336+
echo "##########################"
337+
echo "# ERROR for anchored MCs #"
338+
echo "##########################"
339+
echo
340+
print_error_logs ${TEST_PARENT_DIR_ANCHORED}
341+
fi
342+
343+
RET=$(( ret_global_bin + ret_global_anchored ))
344+
291345
echo
292-
echo_green "All required workflow tests successful"
293-
echo
346+
[[ "${RET}" != "0" ]] && echo "There were errors, please check!" || echo_green "All required workflow tests successful"
347+
348+
exit ${RET}

0 commit comments

Comments
 (0)