Skip to content

Commit df17886

Browse files
authored
Dump stacktrace on timeout (#5367)
This simplifies the gymnastic required to get a stacktrace. Notice that in particular the current cmd & wait approach could lead to zombie processes which are able to escape the parent shell and stay hanged in docker.
1 parent 5b7c2d4 commit df17886

File tree

1 file changed

+3
-36
lines changed

1 file changed

+3
-36
lines changed

tests/tests-wrapper.sh.in

Lines changed: 3 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -77,22 +77,12 @@ banner "/Environment"
7777
# Do we have timeout?
7878
TIMEOUT_EXEC=timeout
7979
TIMEOUT_CMD=
80-
TIMEOUT_PSTACK=
8180
type $TIMEOUT_EXEC &>/dev/null || TIMEOUT_EXEC=gtimeout
8281
type $TIMEOUT_EXEC &>/dev/null || TIMEOUT_EXEC=
8382
if [[ $TIMEOUT_EXEC && $TIMEOUT ]]; then
8483
# Kill with 15; if after 10 seconds it's still alive, send 9
85-
TIMEOUT_CMD="$TIMEOUT_EXEC --signal=SIGTERM --kill-after=10s ${TIMEOUT}s"
84+
TIMEOUT_CMD="$TIMEOUT_EXEC --signal=SIGABRT --kill-after=10s ${TIMEOUT}s"
8685
# Get a stack trace, if possible, shortly before sending the first SIGTERM
87-
if type pstack &>/dev/null; then
88-
export GDB=$(which gdb 2>/dev/null)
89-
TIMEOUT_PSTACK=$((TIMEOUT - 10))
90-
if [[ $TIMEOUT_PSTACK -lt 10 ]]; then
91-
TIMEOUT_PSTACK=10
92-
fi
93-
else
94-
banner "Will not get stack trace of processes timing out: pstack not found"
95-
fi
9686
fi
9787

9888
banner "Timeout prefix: $TIMEOUT_CMD"
@@ -101,7 +91,7 @@ CMD="$1"
10191
shift
10292
type "$CMD" &>/dev/null || CMD="@CMAKE_BINARY_DIR@/bin/$CMD"
10393

104-
for ((ATTEMPT = 1; ATTEMPT <= MAX_ATTEMPTS; ATTEMPT++)); do
94+
for ATTEMPT in `seq 1 $MAX_ATTEMPTS`; do
10595
DARGS=("$@")
10696
# Deduping args that contain a ":"
10797
N=${#DARGS[@]}
@@ -116,36 +106,13 @@ for ((ATTEMPT = 1; ATTEMPT <= MAX_ATTEMPTS; ATTEMPT++)); do
116106

117107
banner "Running $CMD with args ${DARGS[*]} (attempt $ATTEMPT/$MAX_ATTEMPTS)"
118108
ERR=0
119-
rm -f "${LOG}.bt"
120-
$TIMEOUT_CMD "$CMD" "${DARGS[@]}" &
121-
REAL_PID=$(pgrep -P$! 2>/dev/null || true)
122-
if [[ $REAL_PID && $TIMEOUT_PSTACK ]]; then
123-
banner "Process running as $REAL_PID for attempt $ATTEMPT"
124-
FINISHED=
125-
for ((I = 0; I < TIMEOUT_PSTACK; I++)); do
126-
if ! kill -0 $REAL_PID &>/dev/null; then
127-
FINISHED=1
128-
break
129-
fi
130-
sleep 1
131-
done
132-
if [[ ! $FINISHED ]]; then
133-
pstack $REAL_PID &>"${LOG}.bt" || true
134-
fi
135-
fi
136-
wait $! || ERR=$? # wait timeout process, not real PID
109+
SEGFAULT_SIGNALS=all LD_PRELOAD=libSegFault.so $TIMEOUT_CMD "$CMD" "${DARGS[@]}" || ERR=$?
137110
if [[ $ERR == 0 ]]; then
138111
banner "Test finished with success after $ATTEMPT attempts, exiting"
139112
mv "$LOG" "${LOG}.0"
140113
exit 0
141114
else
142115
banner "Test attempt $ATTEMPT/$MAX_ATTEMPTS failed with exit code $ERR"
143-
if [[ -e "${LOG}.bt" ]]; then
144-
banner "Stack trace follows for attempt $ATTEMPT"
145-
cat "${LOG}.bt"
146-
rm -f "${LOG}.bt"
147-
banner "End of stack trace for attempt $ATTEMPT"
148-
fi
149116
fi
150117
done
151118

0 commit comments

Comments
 (0)