diff --git a/src/perftest_communication.c b/src/perftest_communication.c index 5c89dd0f..6384811e 100755 --- a/src/perftest_communication.c +++ b/src/perftest_communication.c @@ -1125,9 +1125,10 @@ int rdma_client_connect(struct pingpong_context *ctx,struct perftest_parameters } if (event->event != RDMA_CM_EVENT_ESTABLISHED) { - fprintf(stderr, "Unexpected CM event bl blka %d\n", event->event); + fprintf(stderr, "Unexpected CM event bl blka %s; error: %d.\n", + rdma_event_str(event->event), event->status); rdma_ack_cm_event(event); - return FAILURE; + return FAILURE; } if (user_param->connection_type == UD) { diff --git a/src/perftest_resources.c b/src/perftest_resources.c index 94e95864..a260290e 100755 --- a/src/perftest_resources.c +++ b/src/perftest_resources.c @@ -108,6 +108,7 @@ static __always_inline int poll_completions( struct perftest_parameters* duration_param; struct check_alive_data check_alive_data; +volatile sig_atomic_t g_sigalarm_fired = 0; /****************************************************************************** * Beginning @@ -5859,6 +5860,7 @@ uint16_t ctx_get_local_lid(struct ibv_context *context,int port) ******************************************************************************/ void catch_alarm(int sig) { + g_sigalarm_fired = 1; switch (duration_param->state) { case START_STATE: duration_param->state = SAMPLE_STATE; @@ -5886,6 +5888,7 @@ void catch_alarm(int sig) void check_alive(int sig) { + g_sigalarm_fired = 1; if (check_alive_data.current_totrcnt > check_alive_data.last_totrcnt) { check_alive_data.last_totrcnt = check_alive_data.current_totrcnt; alarm(60); diff --git a/src/perftest_resources.h b/src/perftest_resources.h index 3bdc7952..5e2d894b 100644 --- a/src/perftest_resources.h +++ b/src/perftest_resources.h @@ -72,6 +72,9 @@ #include #include #include +#include +#include +#include #include "perftest_parameters.h" #define NUM_OF_RETRIES (10) @@ -185,6 +188,7 @@ static inline uint64_t build_wr_id(uint32_t wr_index, uint16_t qp_index) return ((uint64_t)wr_index) | ((uint64_t)qp_index << WR_ID_QP_INDEX_OFFSET); } +extern volatile sig_atomic_t g_sigalarm_fired; /****************************************************************************** * Perftest resources Structures and data types. ******************************************************************************/ @@ -865,15 +869,31 @@ static __inline void increase_rem_addr(struct ibv_send_wr *wr,int size,uint64_t static __inline int ctx_notify_send_recv_events(struct pingpong_context *ctx) { fd_set rfds; + int ret; - FD_ZERO(&rfds); - FD_SET(ctx->recv_channel->fd, &rfds); - FD_SET(ctx->send_channel->fd, &rfds); + do { + FD_ZERO(&rfds); + FD_SET(ctx->recv_channel->fd, &rfds); + FD_SET(ctx->send_channel->fd, &rfds); - if (select(MAX(ctx->recv_channel->fd, - ctx->send_channel->fd) + 1, - &rfds, NULL, NULL, NULL) == -1) { - fprintf(stderr, "Failed to get completion events\n"); + g_sigalarm_fired = 0; + + ret = select(MAX(ctx->recv_channel->fd, + ctx->send_channel->fd) + 1, + &rfds, NULL, NULL, NULL); + + if (ret == -1 && errno == EINTR) { + if (g_sigalarm_fired) { + fprintf(stderr, "Confirmed: select() was interrupted by SIGALARM. Retrying...\n"); + } else { + fprintf(stderr, "Warning: select() interrupted by another signal. Retrying...\n"); + } + } + + } while (ret == -1 && errno == EINTR); + + if (ret == -1) { + fprintf(stderr, "Failed to get completion events: %s\n", strerror(errno)); return FAILURE; }