From 39fc9f48e5ef55aad828d3195d34ea98c9f4324f Mon Sep 17 00:00:00 2001 From: Abhishek Bansal Date: Fri, 20 Feb 2026 22:27:42 +0530 Subject: [PATCH] MDEV-29252: Avoid masking specific IO thread errors with generic relay log write failure --- .../binlog_encryption/rpl_checksum.result | 2 +- .../binlog_encryption/rpl_corruption.result | 4 +-- mysql-test/suite/rpl/r/rpl_checksum.result | 2 +- mysql-test/suite/rpl/r/rpl_corruption.result | 4 +-- ..._semi_sync_slave_enabled_consistent.result | 2 +- mysql-test/suite/rpl/t/rpl_checksum.test | 8 ++--- mysql-test/suite/rpl/t/rpl_corruption.test | 8 ++--- ...pl_semi_sync_slave_enabled_consistent.test | 2 +- sql/slave.cc | 35 ++++++++++++------- 9 files changed, 37 insertions(+), 30 deletions(-) diff --git a/mysql-test/suite/binlog_encryption/rpl_checksum.result b/mysql-test/suite/binlog_encryption/rpl_checksum.result index 503f359f72644..5a756446bf8ca 100644 --- a/mysql-test/suite/binlog_encryption/rpl_checksum.result +++ b/mysql-test/suite/binlog_encryption/rpl_checksum.result @@ -109,7 +109,7 @@ connection slave; set @saved_dbug = @@global.debug_dbug; set @@global.debug_dbug='d,simulate_checksum_test_failure'; start slave io_thread; -include/wait_for_slave_io_error.inc [errno=1595,1913] +include/wait_for_slave_io_error.inc [errno=1743] set @@global.debug_dbug = @saved_dbug; start slave io_thread; include/wait_for_slave_param.inc [Read_Master_Log_Pos] diff --git a/mysql-test/suite/binlog_encryption/rpl_corruption.result b/mysql-test/suite/binlog_encryption/rpl_corruption.result index 7c60e15a567dc..277546cb5130c 100644 --- a/mysql-test/suite/binlog_encryption/rpl_corruption.result +++ b/mysql-test/suite/binlog_encryption/rpl_corruption.result @@ -36,7 +36,7 @@ SET GLOBAL master_verify_checksum=0; SET @@global.debug_dbug="d,corrupt_read_log_event2_set"; connection slave; START SLAVE IO_THREAD; -include/wait_for_slave_io_error.inc [errno=1595,1743] +include/wait_for_slave_io_error.inc [errno=1743] connection master; SET @@global.debug_dbug=@saved_dbug; SET GLOBAL master_verify_checksum=1; @@ -45,7 +45,7 @@ connection slave; SET @saved_dbug_slave = @@GLOBAL.debug_dbug; SET @@global.debug_dbug="d,corrupt_queue_event"; START SLAVE IO_THREAD; -include/wait_for_slave_io_error.inc [errno=1595,1743] +include/wait_for_slave_io_error.inc [errno=1743] SET @@global.debug_dbug=@saved_dbug_slave; # 6. Slave. Corruption in relay log SET @@global.debug_dbug="d,corrupt_read_log_event_char"; diff --git a/mysql-test/suite/rpl/r/rpl_checksum.result b/mysql-test/suite/rpl/r/rpl_checksum.result index 39ebb85b9e94a..e4ac5e2cff755 100644 --- a/mysql-test/suite/rpl/r/rpl_checksum.result +++ b/mysql-test/suite/rpl/r/rpl_checksum.result @@ -109,7 +109,7 @@ connection slave; set @saved_dbug = @@global.debug_dbug; set @@global.debug_dbug='d,simulate_checksum_test_failure'; start slave io_thread; -include/wait_for_slave_io_error.inc [errno=1595,1913] +include/wait_for_slave_io_error.inc [errno=1743] set @@global.debug_dbug = @saved_dbug; start slave io_thread; include/wait_for_slave_param.inc [Read_Master_Log_Pos] diff --git a/mysql-test/suite/rpl/r/rpl_corruption.result b/mysql-test/suite/rpl/r/rpl_corruption.result index 7c60e15a567dc..277546cb5130c 100644 --- a/mysql-test/suite/rpl/r/rpl_corruption.result +++ b/mysql-test/suite/rpl/r/rpl_corruption.result @@ -36,7 +36,7 @@ SET GLOBAL master_verify_checksum=0; SET @@global.debug_dbug="d,corrupt_read_log_event2_set"; connection slave; START SLAVE IO_THREAD; -include/wait_for_slave_io_error.inc [errno=1595,1743] +include/wait_for_slave_io_error.inc [errno=1743] connection master; SET @@global.debug_dbug=@saved_dbug; SET GLOBAL master_verify_checksum=1; @@ -45,7 +45,7 @@ connection slave; SET @saved_dbug_slave = @@GLOBAL.debug_dbug; SET @@global.debug_dbug="d,corrupt_queue_event"; START SLAVE IO_THREAD; -include/wait_for_slave_io_error.inc [errno=1595,1743] +include/wait_for_slave_io_error.inc [errno=1743] SET @@global.debug_dbug=@saved_dbug_slave; # 6. Slave. Corruption in relay log SET @@global.debug_dbug="d,corrupt_read_log_event_char"; diff --git a/mysql-test/suite/rpl/r/rpl_semi_sync_slave_enabled_consistent.result b/mysql-test/suite/rpl/r/rpl_semi_sync_slave_enabled_consistent.result index 4195acb931d74..edc4ac9df2325 100644 --- a/mysql-test/suite/rpl/r/rpl_semi_sync_slave_enabled_consistent.result +++ b/mysql-test/suite/rpl/r/rpl_semi_sync_slave_enabled_consistent.result @@ -23,7 +23,7 @@ set @@global.rpl_semi_sync_slave_enabled= OFF; set debug_sync='now signal io_thread_continue_read_event'; # Waiting for the slave to stop with the error from corrupt_queue_event connection slave; -include/wait_for_slave_io_error.inc [errno=1595,1743] +include/wait_for_slave_io_error.inc [errno=1743] # Sleep 1 to give time for Ack_receiver to receive COM_QUIT include/assert_grep.inc [Check that there is no 'Read semi-sync reply magic number error' in error log.] # diff --git a/mysql-test/suite/rpl/t/rpl_checksum.test b/mysql-test/suite/rpl/t/rpl_checksum.test index bea325b251f1a..2eec8e97a83ea 100644 --- a/mysql-test/suite/rpl/t/rpl_checksum.test +++ b/mysql-test/suite/rpl/t/rpl_checksum.test @@ -162,11 +162,9 @@ connection slave; set @saved_dbug = @@global.debug_dbug; set @@global.debug_dbug='d,simulate_checksum_test_failure'; start slave io_thread; -# When the checksum error is detected, the slave sets error code 1913 -# (ER_NETWORK_READ_EVENT_CHECKSUM_FAILURE) in queue_event(), then immediately -# sets error 1595 (ER_SLAVE_RELAY_LOG_WRITE_FAILURE) in handle_slave_io(). -# So we usually get 1595, but it is occasionally possible to get 1913. ---let $slave_io_errno= 1595,1913 +# When the checksum error is detected, the slave sets error code 1743 +# (ER_NETWORK_READ_EVENT_CHECKSUM_FAILURE) in queue_event(). +--let $slave_io_errno= 1743 --let $show_slave_io_error= 0 source include/wait_for_slave_io_error.inc; set @@global.debug_dbug = @saved_dbug; diff --git a/mysql-test/suite/rpl/t/rpl_corruption.test b/mysql-test/suite/rpl/t/rpl_corruption.test index cd2a1cc7ef207..504f76d9d6bad 100644 --- a/mysql-test/suite/rpl/t/rpl_corruption.test +++ b/mysql-test/suite/rpl/t/rpl_corruption.test @@ -130,10 +130,8 @@ SET @@global.debug_dbug="d,corrupt_read_log_event2_set"; --connection slave START SLAVE IO_THREAD; # When the checksum error is detected, the slave sets error code 1743 -# (ER_NETWORK_READ_EVENT_CHECKSUM_FAILURE) in queue_event(), then immediately -# sets error 1595 (ER_SLAVE_RELAY_LOG_WRITE_FAILURE) in handle_slave_io(). -# So we usually get 1595, but it is occasionally possible to get 1743. -let $slave_io_errno= 1595,1743; # ER_SLAVE_RELAY_LOG_WRITE_FAILURE, ER_NETWORK_READ_EVENT_CHECKSUM_FAILURE +# (ER_NETWORK_READ_EVENT_CHECKSUM_FAILURE) in queue_event(). +let $slave_io_errno= 1743; # ER_NETWORK_READ_EVENT_CHECKSUM_FAILURE --source include/wait_for_slave_io_error.inc --connection master SET @@global.debug_dbug=@saved_dbug; @@ -145,7 +143,7 @@ SET GLOBAL master_verify_checksum=1; SET @saved_dbug_slave = @@GLOBAL.debug_dbug; SET @@global.debug_dbug="d,corrupt_queue_event"; START SLAVE IO_THREAD; -let $slave_io_errno= 1595,1743; # ER_SLAVE_RELAY_LOG_WRITE_FAILURE, ER_NETWORK_READ_EVENT_CHECKSUM_FAILURE +let $slave_io_errno= 1743; # ER_NETWORK_READ_EVENT_CHECKSUM_FAILURE --source include/wait_for_slave_io_error.inc SET @@global.debug_dbug=@saved_dbug_slave; diff --git a/mysql-test/suite/rpl/t/rpl_semi_sync_slave_enabled_consistent.test b/mysql-test/suite/rpl/t/rpl_semi_sync_slave_enabled_consistent.test index ca7e788780c29..a143ec4f5b143 100644 --- a/mysql-test/suite/rpl/t/rpl_semi_sync_slave_enabled_consistent.test +++ b/mysql-test/suite/rpl/t/rpl_semi_sync_slave_enabled_consistent.test @@ -50,7 +50,7 @@ set debug_sync='now signal io_thread_continue_read_event'; --echo # Waiting for the slave to stop with the error from corrupt_queue_event --connection slave ---let $slave_io_errno= 1595,1743 +--let $slave_io_errno= 1743 --source include/wait_for_slave_io_error.inc --echo # Sleep 1 to give time for Ack_receiver to receive COM_QUIT diff --git a/sql/slave.cc b/sql/slave.cc index 893e24627bddc..8a4e943b7674e 100644 --- a/sql/slave.cc +++ b/sql/slave.cc @@ -5133,9 +5133,6 @@ Stopping slave I/O thread due to out-of-memory error from master"); if (queue_event(mi, event_buf, event_len)) { - mi->report(ERROR_LEVEL, ER_SLAVE_RELAY_LOG_WRITE_FAILURE, NULL, - ER_THD(thd, ER_SLAVE_RELAY_LOG_WRITE_FAILURE), - "could not queue event from master"); goto err; } @@ -6402,18 +6399,27 @@ static int queue_binlog_ver_3_event(Master_info *mi, const uchar *buf, static int queue_old_event(Master_info *mi, const uchar *buf, ulong event_len) { + int ret = 0; DBUG_ENTER("queue_old_event"); switch (mi->rli.relay_log.description_event_for_queue->binlog_version) { case 1: - DBUG_RETURN(queue_binlog_ver_1_event(mi,buf,event_len)); + ret= queue_binlog_ver_1_event(mi,buf,event_len); + break; case 3: - DBUG_RETURN(queue_binlog_ver_3_event(mi,buf,event_len)); + ret= queue_binlog_ver_3_event(mi,buf,event_len); + break; default: /* unsupported format; eg version 2 */ DBUG_PRINT("info",("unsupported binlog format %d in queue_old_event()", mi->rli.relay_log.description_event_for_queue->binlog_version)); - DBUG_RETURN(1); + ret= 1; + break; } + + if (ret == 1) + DBUG_RETURN(ER_SLAVE_RELAY_LOG_WRITE_FAILURE); + + DBUG_RETURN(ret); } /* @@ -6511,7 +6517,12 @@ static int queue_event(Master_info* mi, const uchar *buf, ulong event_len) if (mi->rli.relay_log.description_event_for_queue->binlog_version < 4 && buf[EVENT_TYPE_OFFSET] != FORMAT_DESCRIPTION_EVENT /* a way to escape */ && buf[EVENT_TYPE_OFFSET] != HEARTBEAT_LOG_EVENT) - DBUG_RETURN(queue_old_event(mi,buf,event_len)); + { + error= queue_old_event(mi, buf, event_len); + if (unlikely(error)) + goto err; + DBUG_RETURN(0); + } #ifdef ENABLED_DEBUG_SYNC /* @@ -7495,12 +7506,12 @@ static int queue_event(Master_info* mi, const uchar *buf, ulong event_len) DBUG_PRINT("info", ("error: %d", error)); /* - Do not print ER_SLAVE_RELAY_LOG_WRITE_FAILURE error here, as the caller - handle_slave_io() prints it on return. + Print all the errors that occurred during queuing here. */ - if (unlikely(error) && error != ER_SLAVE_RELAY_LOG_WRITE_FAILURE) - mi->report(ERROR_LEVEL, error, NULL, ER_DEFAULT(error), - error_msg.ptr()); + if (unlikely(error)) + mi->report(ERROR_LEVEL, error, NULL, ER_THD(mi->io_thd, error), + (error == ER_SLAVE_RELAY_LOG_WRITE_FAILURE && error_msg.is_empty()) ? + "could not queue event from master" : error_msg.ptr()); if (unlikely(is_malloc)) my_free((void *)new_buf);