From 5186da2b9bd1b240e47dffa1bf538835d8c2dd22 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Heath=20Dutton=F0=9F=95=B4=EF=B8=8F?= Date: Wed, 24 Dec 2025 15:13:31 -0500 Subject: [PATCH] gh-141707: Fix tarfile type corruption with GNU long names When processing GNU long name headers, the name field in the second header contains garbage data. The V7 directory detection logic in frombuf() would incorrectly mark regular files as directories if this garbage ended with '/'. Re-apply the detection after patching the actual long name to correct any type corruption. --- Lib/tarfile.py | 6 ++++++ .../Library/2025-12-24-15-28-43.gh-issue-141707.8vN2Kp.rst | 1 + 2 files changed, 7 insertions(+) create mode 100644 Misc/NEWS.d/next/Library/2025-12-24-15-28-43.gh-issue-141707.8vN2Kp.rst diff --git a/Lib/tarfile.py b/Lib/tarfile.py index 7db3a40c9b33cf..6fa9be0f769bc6 100644 --- a/Lib/tarfile.py +++ b/Lib/tarfile.py @@ -1410,6 +1410,12 @@ def _proc_gnulong(self, tarfile): next.offset = self.offset if self.type == GNUTYPE_LONGNAME: next.name = nts(buf, tarfile.encoding, tarfile.errors) + # V7 directory detection in frombuf() may have used garbage + # name data. Re-apply it with the actual long name. + if next.type == DIRTYPE and not next.name.endswith("/"): + next.type = AREGTYPE + elif next.type == AREGTYPE and next.name.endswith("/"): + next.type = DIRTYPE elif self.type == GNUTYPE_LONGLINK: next.linkname = nts(buf, tarfile.encoding, tarfile.errors) diff --git a/Misc/NEWS.d/next/Library/2025-12-24-15-28-43.gh-issue-141707.8vN2Kp.rst b/Misc/NEWS.d/next/Library/2025-12-24-15-28-43.gh-issue-141707.8vN2Kp.rst new file mode 100644 index 00000000000000..3dbd2db8497fbb --- /dev/null +++ b/Misc/NEWS.d/next/Library/2025-12-24-15-28-43.gh-issue-141707.8vN2Kp.rst @@ -0,0 +1 @@ +Fix :mod:`tarfile` type corruption when extracting files with GNU long names. Regular files were incorrectly identified as directories if the header's name field ended with '/'.