Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 11 additions & 5 deletions Lib/tarfile.py
Original file line number Diff line number Diff line change
Expand Up @@ -830,16 +830,22 @@ def _get_filtered_attrs(member, dest_path, for_data=True):
if member.islnk() or member.issym():
if os.path.isabs(member.linkname):
raise AbsoluteLinkError(member)
# A link member that resolves to the destination directory itself
# would replace it with a (sym)link, redirecting the destination
# for all subsequent members.
if target_path == dest_path:
raise OutsideDestinationError(member, target_path)
normalized = os.path.normpath(member.linkname)
if normalized != member.linkname:
new_attrs['linkname'] = normalized
if member.issym():
target_path = os.path.join(dest_path,
os.path.dirname(name),
member.linkname)
# The symlink is created at `name` with trailing separators
# stripped, so its target is relative to the directory
# containing that path.
link_dir = os.path.dirname(name.rstrip('/' + os.sep))
target_path = os.path.join(dest_path, link_dir, normalized)
else:
target_path = os.path.join(dest_path,
member.linkname)
target_path = os.path.join(dest_path, normalized)
target_path = os.path.realpath(target_path,
strict=os.path.ALLOW_MISSING)
if os.path.commonpath([target_path, dest_path]) != dest_path:
Expand Down
151 changes: 117 additions & 34 deletions Lib/test/test_tarfile.py
Original file line number Diff line number Diff line change
Expand Up @@ -3682,6 +3682,39 @@ class TestExtractionFilters(unittest.TestCase):
# The destination for the extraction, within `outerdir`
destdir = outerdir / 'dest'

@classmethod
def setUpClass(cls):
# Posix and Windows have different pathname resolution:
# either symlink or a '..' component resolve first.
# Let's see which we are on.
if os_helper.can_symlink():
testpath = os.path.join(TEMPDIR, 'resolution_test')
os.mkdir(testpath)

# testpath/current links to `.` which is all of:
# - `testpath`
# - `testpath/current`
# - `testpath/current/current`
# - etc.
os.symlink('.', os.path.join(testpath, 'current'))

# we'll test where `testpath/current/../file` ends up
with open(os.path.join(testpath, 'current', '..', 'file'), 'w'):
pass

if os.path.exists(os.path.join(testpath, 'file')):
# Windows collapses 'current\..' to '.' first, leaving
# 'testpath\file'
cls.dotdot_resolves_early = True
elif os.path.exists(os.path.join(testpath, '..', 'file')):
# Posix resolves 'current' to '.' first, leaving
# 'testpath/../file'
cls.dotdot_resolves_early = False
else:
raise AssertionError('Could not determine link resolution')
else:
cls.dotdot_resolves_early = False

@contextmanager
def check_context(self, tar, filter, *, check_flag=True):
"""Extracts `tar` to `self.destdir` and allows checking the result
Expand Down Expand Up @@ -3853,10 +3886,19 @@ def test_parent_symlink(self):
+ "which is outside the destination")

with self.check_context(arc.open(), 'data'):
self.expect_exception(
tarfile.LinkOutsideDestinationError,
"""'parent' would link to ['"].*outerdir['"], """
+ "which is outside the destination")
if self.dotdot_resolves_early:
# 'current/../..' normalises to '..', which is rejected.
self.expect_exception(
tarfile.LinkOutsideDestinationError,
"""'parent' would link to ['"].*outerdir['"], """
+ "which is outside the destination")
else:
# 'current/..' normalises to '.'; the rewritten link is
# created and 'parent/evil' lands harmlessly inside the
# destination.
self.expect_file('current', symlink_to='.')
self.expect_file('parent', symlink_to='.')
self.expect_file('evil')

else:
# No symlink support. The symlinks are ignored.
Expand Down Expand Up @@ -3946,35 +3988,6 @@ def test_parent_symlink2(self):
# Test interplaying symlinks
# Inspired by 'dirsymlink2b' in jwilk/traversal-archives

# Posix and Windows have different pathname resolution:
# either symlink or a '..' component resolve first.
# Let's see which we are on.
if os_helper.can_symlink():
testpath = os.path.join(TEMPDIR, 'resolution_test')
os.mkdir(testpath)

# testpath/current links to `.` which is all of:
# - `testpath`
# - `testpath/current`
# - `testpath/current/current`
# - etc.
os.symlink('.', os.path.join(testpath, 'current'))

# we'll test where `testpath/current/../file` ends up
with open(os.path.join(testpath, 'current', '..', 'file'), 'w'):
pass

if os.path.exists(os.path.join(testpath, 'file')):
# Windows collapses 'current\..' to '.' first, leaving
# 'testpath\file'
dotdot_resolves_early = True
elif os.path.exists(os.path.join(testpath, '..', 'file')):
# Posix resolves 'current' to '.' first, leaving
# 'testpath/../file'
dotdot_resolves_early = False
else:
raise AssertionError('Could not determine link resolution')

with ArchiveMaker() as arc:

# `current` links to `.` which is both the destination directory
Expand Down Expand Up @@ -4010,7 +4023,7 @@ def test_parent_symlink2(self):

with self.check_context(arc.open(), 'data'):
if os_helper.can_symlink():
if dotdot_resolves_early:
if self.dotdot_resolves_early:
# Fail when extracting a file outside destination
self.expect_exception(
tarfile.OutsideDestinationError,
Expand Down Expand Up @@ -4130,6 +4143,76 @@ def test_sly_relative2(self):
+ """['"].*moo['"], which is outside the """
+ "destination")

@symlink_test
@os_helper.skip_unless_symlink
def test_normpath_realpath_mismatch(self):
# The link-target check must validate the value that will actually
# be written to disk (the normalised linkname), not the original.
# Here 'a' is a symlink to a deep nonexistent path, so realpath()
# of 'a/../../...' stays inside the destination while normpath()
# collapses 'a/..' lexically and escapes.
depth = len(self.destdir.parts) + 5
deep = '/'.join(f'p{i}' for i in range(depth))
sneaky = 'a/' + '../' * depth + 'flag'
for kind in 'symlink_to', 'hardlink_to':
with self.subTest(kind):
with ArchiveMaker() as arc:
arc.add('a', symlink_to=deep)
arc.add('escape', **{kind: sneaky})
with self.check_context(arc.open(), 'data'):
self.expect_exception(
tarfile.LinkOutsideDestinationError)

@symlink_test
@os_helper.skip_unless_symlink
def test_symlink_trailing_slash(self):
# A trailing slash on a symlink member's name must not cause the
# link target to be resolved relative to the wrong directory.
with ArchiveMaker() as arc:
t = tarfile.TarInfo('x/')
t.type = tarfile.SYMTYPE
t.linkname = '..'
arc.tar_w.addfile(t)
arc.add('x/escaped', content='hi')

with self.check_context(arc.open(), 'data'):
self.expect_exception(tarfile.LinkOutsideDestinationError)

@symlink_test
@os_helper.skip_unless_symlink
def test_link_at_destination(self):
# A link member whose name resolves to the destination directory
# itself must be rejected: otherwise the destination is replaced
# by a symlink and later members can be redirected through it.
for name in '', '.', './':
with ArchiveMaker() as arc:
t = tarfile.TarInfo(name)
t.type = tarfile.SYMTYPE
t.linkname = '.'
arc.tar_w.addfile(t)

with self.check_context(arc.open(), 'data'):
self.expect_exception(tarfile.OutsideDestinationError)

@symlink_test
@os_helper.skip_unless_symlink
def test_empty_name_symlink_chain(self):
# Regression test for a chain of empty-named symlinks that
# incrementally redirects the destination outwards.
with ArchiveMaker() as arc:
for name, target in [('', ''), ('a/', '..'),
('', 'dummy'), ('', 'a'),
('b/', '..'),
('', 'dummy'), ('', 'a/b')]:
t = tarfile.TarInfo(name)
t.type = tarfile.SYMTYPE
t.linkname = target
arc.tar_w.addfile(t)
arc.add('escaped', content='hi')

with self.check_context(arc.open(), 'data'):
self.expect_exception(tarfile.FilterError)

@symlink_test
def test_deep_symlink(self):
# Test that symlinks and hardlinks inside a directory
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
:func:`tarfile.data_filter` now validates link targets using the same
normalised value that is written to disk, strips trailing separators from
the member name when resolving a symlink's directory, and rejects link
members that would replace the destination directory itself. This closes
several path-traversal bypasses of the ``data`` extraction filter.
Loading