diff --git a/changes/394.fixed b/changes/394.fixed new file mode 100644 index 00000000..20126908 --- /dev/null +++ b/changes/394.fixed @@ -0,0 +1,2 @@ +Fixed Junos reboots not being detected when waiting for the device to reload. +Increased the default Junos reboot wait timeout from 1 hour to 2 hours. diff --git a/pyntc/devices/jnpr_device.py b/pyntc/devices/jnpr_device.py index 70c8fcad..b6431204 100644 --- a/pyntc/devices/jnpr_device.py +++ b/pyntc/devices/jnpr_device.py @@ -228,18 +228,53 @@ def _uptime_to_string(self, uptime_full_string): days, hours, minutes, seconds = self._uptime_components(uptime_full_string) return f"{days:02d}:{hours:02d}:{minutes:02d}:{seconds:02d}" - def _wait_for_device_reboot(self, timeout=3600): + def _wait_for_device_reboot(self, original_uptime, timeout=7200): + """Block until the device reboots and accepts a fresh connection. + + Drops the existing NETCONF session and polls for the device to come back. + The reboot is considered complete when a new connection succeeds and the + device reports an uptime lower than ``original_uptime`` (i.e., it has + booted since the reboot was issued). + + The pre-reboot session must be discarded first: once the device restarts, + PyEZ still reports it as connected even though the transport is dead, so it + is closed here to force each probe to establish a fresh connection. + + Args: + original_uptime (int): Device uptime in seconds captured before the reboot. + timeout (int, optional): Max seconds to wait for the device to return. Defaults to 2 hours. + """ start = time.time() - disconnected = False + + # Drop the pre-reboot NETCONF session so subsequent probes can't read from + # a stale connection PyEZ still reports as connected. + try: + self.close() + except Exception as close_exc: # pylint: disable=broad-exception-caught + log.debug("Host %s: Pre-reboot disconnect raised %s (ignored).", self.host, close_exc) + while time.time() - start < timeout: - if disconnected: - try: - self.open() + try: + self.open() + self._uptime = None + current_uptime = self.uptime + if current_uptime is not None and current_uptime < original_uptime: + log.info( + "Host %s: Device rebooted (uptime %ss < pre-reboot %ss).", + self.host, + current_uptime, + original_uptime, + ) return - except: # noqa E722 # nosec # pylint: disable=bare-except - pass - elif not self.connected: - disconnected = True + log.debug( + "Host %s: Reachable but uptime %ss >= pre-reboot %ss; still waiting.", + self.host, + current_uptime, + original_uptime, + ) + except Exception as exc: # pylint: disable=broad-exception-caught + log.debug("Host %s: Reboot probe failed (%s); will retry.", self.host, exc) + self.native.connected = False time.sleep(10) raise RebootTimeoutError(hostname=self.hostname, wait_time=timeout) @@ -318,12 +353,14 @@ def uptime(self): Returns: (int): Device uptime in seconds. """ - try: - native_uptime_string = self.native.facts["RE0"]["up_time"] - except (AttributeError, TypeError): - native_uptime_string = None - if self._uptime is None: + try: + # Bust PyEZ's cached facts so a cold cache always reflects the live device. + self.native.facts_refresh(keys="RE0") + native_uptime_string = self.native.facts["RE0"]["up_time"] + except (AttributeError, TypeError, KeyError): + native_uptime_string = None + if native_uptime_string is not None: self._uptime = self._uptime_to_seconds(native_uptime_string) @@ -337,13 +374,16 @@ def uptime_string(self): Returns: (str): Device uptime. """ - try: - native_uptime_string = self.native.facts["RE0"]["up_time"] - except (AttributeError, TypeError): - native_uptime_string = None - if self._uptime_string is None: - self._uptime_string = self._uptime_to_string(native_uptime_string) + try: + # Bust PyEZ's cached facts so a cold cache always reflects the live device. + self.native.facts_refresh(keys="RE0") + native_uptime_string = self.native.facts["RE0"]["up_time"] + except (AttributeError, TypeError, KeyError): + native_uptime_string = None + + if native_uptime_string is not None: + self._uptime_string = self._uptime_to_string(native_uptime_string) return self._uptime_string @@ -505,13 +545,13 @@ def open(self): if not self.connected: self.native.open() - def reboot(self, wait_for_reload=False, timeout=3600, confirm=None): + def reboot(self, wait_for_reload=False, timeout=7200, confirm=None): """ Reload the controller or controller pair. Args: wait_for_reload (bool): Whether the reboot method should wait for the device to come back up before returning. Defaults to False. - timeout (int, optional): Time in seconds to wait for the device to return after reboot. Defaults to 1 hour. + timeout (int, optional): Time in seconds to wait for the device to return after reboot. Defaults to 2 hours. confirm (None): Not used. Deprecated since v0.17.0. Example: @@ -522,9 +562,16 @@ def reboot(self, wait_for_reload=False, timeout=3600, confirm=None): if confirm is not None: warnings.warn("Passing 'confirm' to reboot method is deprecated.", DeprecationWarning) + self._uptime = None + original_uptime = self.uptime + if original_uptime is None: + raise CommandError( + command="reboot", + message="Could not determine pre-reboot uptime; refusing to wait for reload.", + ) self.sw.reboot(in_min=0) if wait_for_reload: - self._wait_for_device_reboot(timeout=timeout) + self._wait_for_device_reboot(original_uptime, timeout=timeout) def rollback(self, filename): """Rollback to a specific configuration file. diff --git a/tests/unit/test_devices/test_jnpr_device.py b/tests/unit/test_devices/test_jnpr_device.py index 5b9d325d..28271a64 100644 --- a/tests/unit/test_devices/test_jnpr_device.py +++ b/tests/unit/test_devices/test_jnpr_device.py @@ -244,18 +244,28 @@ def test_reboot(self): @mock.patch("pyntc.devices.jnpr_device.time.sleep") def test_wait_for_device_to_reboot(self, mock_sleep): - with mock.patch.object(self.device, "open") as mock_open: - # Emulate the device disconnected and reconnecting - type(self.device.native).connected = mock.PropertyMock(side_effect=[True, False, True]) - mock_open.side_effect = [Exception, Exception, True] - self.device.reboot(wait_for_reload=True, timeout=3) - mock_open.assert_has_calls([mock.call()] * 3) + """Reboot completes when the device returns with a lower uptime than the baseline.""" + with ( + mock.patch.object(self.device, "open") as mock_open, + mock.patch.object(self.device, "close"), + mock.patch.object(type(self.device), "uptime", new_callable=mock.PropertyMock) as mock_uptime, + ): + # First read is the pre-reboot baseline; second is the post-reboot uptime. + mock_uptime.side_effect = [455, 30] + self.device.reboot(wait_for_reload=True, timeout=30) + + self.device.sw.reboot.assert_called_with(in_min=0) + mock_open.assert_called() @mock.patch("pyntc.devices.jnpr_device.time.sleep") def test_wait_for_device_to_reboot_error(self, mock_sleep): - with mock.patch.object(self.device, "open") as mock_open: - type(self.device.native).connected = mock.PropertyMock(side_effect=[True, False]) - mock_open.side_effect = Exception + """Raise RebootTimeoutError when the device never reports a lower uptime within the timeout.""" + with ( + mock.patch.object(self.device, "open"), + mock.patch.object(self.device, "close"), + mock.patch.object(type(self.device), "uptime", new_callable=mock.PropertyMock) as mock_uptime, + ): + mock_uptime.return_value = 455 with pytest.raises(RebootTimeoutError): self.device.reboot(wait_for_reload=True, timeout=1) @@ -290,12 +300,62 @@ def test_checkpoint(self, mock_scp): self.device.show.assert_called_with("show config") def test_uptime(self): + """Cold cache (_uptime is None) refreshes facts and parses the uptime.""" + self.assertIsNone(self.device._uptime) + uptime = self.device.uptime + self.assertEqual(uptime, 455) + self.device.native.facts_refresh.assert_called_once_with(keys="RE0") + + def test_uptime_cached(self): + """A populated cache is returned as-is, with no device round-trip.""" + self.device._uptime = 1234 uptime = self.device.uptime - assert uptime == 455 + self.assertEqual(uptime, 1234) + self.device.native.facts_refresh.assert_not_called() + + def test_uptime_refreshes_after_cache_cleared(self): + """Clearing the cache forces a fresh read.""" + self.assertEqual(self.device.uptime, 455) + + self.device._uptime = None + self.device.native.facts = {"RE0": {"up_time": "30 seconds"}} + + self.assertEqual(self.device.uptime, 30) + + def test_uptime_none_when_facts_unavailable(self): + """Missing/unavailable facts return None gracefully instead of raising an Exception.""" + self.device._uptime = None + self.device.native.facts = {} + self.assertIsNone(self.device.uptime) def test_uptime_string(self): + """Cold cache (_uptime_string is None) refreshes facts and formats the uptime.""" + self.assertIsNone(self.device._uptime_string) + uptime_string = self.device.uptime_string + self.assertEqual(uptime_string, "00:00:07:35") + self.device.native.facts_refresh.assert_called_once_with(keys="RE0") + + def test_uptime_string_cached(self): + """A populated cache is returned as-is, with no device round-trip.""" + self.device._uptime_string = "01:02:03:04" uptime_string = self.device.uptime_string - assert uptime_string == "00:00:07:35" + self.assertEqual(uptime_string, "01:02:03:04") + self.device.native.facts_refresh.assert_not_called() + + def test_uptime_string_refreshes_after_cache_cleared(self): + """Clearing the cache forces a fresh read.""" + self.assertEqual(self.device.uptime_string, "00:00:07:35") + + self.device._uptime_string = None + self.device.native.facts = {"RE0": {"up_time": "30 seconds"}} + + self.assertEqual(self.device.uptime_string, "00:00:00:30") + + def test_uptime_string_none_when_facts_unavailable(self): + """Missing/unavailable facts return None gracefully instead of raising an Exception.""" + self.device._uptime_string = None + self.device.native.facts = {} + self.assertIsNone(self.device.uptime_string) def test_vendor(self): vendor = self.device.vendor