Compare commits

...

10 Commits

Author SHA1 Message Date
openeuler-ci-bot
1baa553ada
!416 [sync] PR-411: fix CVE-2025-0938
From: @openeuler-sync-bot 
Reviewed-by: @dillon_chen 
Signed-off-by: @dillon_chen
2025-02-14 09:35:29 +00:00
Funda Wang
a47f733d82 fix CVE-2025-0938
(cherry picked from commit 66b5e6bdc273a880452d84fd878ad6f443ed53ff)
2025-02-14 15:19:32 +08:00
openeuler-ci-bot
58f5a43bf0
!398 完善加密算法测试用例
From: @no_coke 
Reviewed-by: @yangyuan32 
Signed-off-by: @yangyuan32
2024-12-11 02:00:11 +00:00
GuoCe
d49e65d4df support TLS_SM4 2024-12-10 18:41:12 +08:00
openeuler-ci-bot
193f5c463e
!392 fix CVE-2023-27043
From: @xinsheng3 
Reviewed-by: @xyncoder, @yangyuan32 
Signed-off-by: @yangyuan32
2024-11-25 09:32:56 +00:00
xinsheng3
bccfffae2e fix CVE-2023-27043 2024-11-25 15:58:47 +08:00
openeuler-ci-bot
40d12857ad
!388 [sync] PR-379: fix CVE-2024-0287
From: @openeuler-sync-bot 
Reviewed-by: @yangyuan32 
Signed-off-by: @yangyuan32
2024-11-22 09:12:05 +00:00
Funda Wang
6965d75829 fix CVE-2024-9287
(cherry picked from commit 6c109459febf92bfa092a481686c2a548c50e3d6)
2024-11-22 17:07:25 +08:00
openeuler-ci-bot
114b6155b1
!359 fix CVE-2024-6232,CVE-2024-3219,CVE-2024-0450,CVE-2023-6597
From: @xinsheng3 
Reviewed-by: @xyncoder, @gaoruoshu 
Signed-off-by: @gaoruoshu
2024-09-25 01:29:47 +00:00
xinsheng3
3cd17cd257 fix CVE-2024-6232,CVE-2024-3219,CVE-2024-0450,CVE-2023-6597 2024-09-24 19:26:24 +08:00
17 changed files with 2620 additions and 17 deletions

View File

@ -0,0 +1,24 @@
From f1b6f59e1883ea5fdb543d2e0554a45d6b756caf Mon Sep 17 00:00:00 2001
From: GuoCe <guoce@kylinos.cn>
Date: Sat, 7 Dec 2024 16:15:49 +0800
Subject: [PATCH] expected_algs list to include TLS_SM4
---
Lib/test/test_ssl.py | 1 +
1 file changed, 1 insertion(+)
diff --git a/Lib/test/test_ssl.py b/Lib/test/test_ssl.py
index 965c272..453624b 100644
--- a/Lib/test/test_ssl.py
+++ b/Lib/test/test_ssl.py
@@ -4324,6 +4324,7 @@ class ThreadedTests(unittest.TestCase):
"AES256", "AES-256",
# TLS 1.3 ciphers are always enabled
"TLS_CHACHA20", "TLS_AES",
+ "TLS_SM4",
]
stats = server_params_test(client_context, server_context,
--
2.33.0

View File

@ -0,0 +1,487 @@
From 49d48ccd462c97ecf63ce9ee4e8f1e94b2604a11 Mon Sep 17 00:00:00 2001
From: Victor Stinner <vstinner@python.org>
Date: Fri, 15 Dec 2023 16:10:40 +0100
Subject: [PATCH] [Backport] CVE-2023-27043 Reject malformed addresses in
email.parseaddr()
Reference: https://github.com/python/cpython/pull/111116
Detect email address parsing errors and return empty tuple to
indicate the parsing error (old API). Add an optional 'strict'
parameter to getaddresses() and parseaddr() functions.
Offering: CloudBu CMP
CVE: CVE-2023-27043
---
Doc/library/email.utils.rst | 19 +-
Lib/email/utils.py | 151 +++++++++++++-
Lib/test/test_email/test_email.py | 186 +++++++++++++++++-
...-10-20-15-28-08.gh-issue-102988.dStNO7.rst | 8 +
4 files changed, 343 insertions(+), 21 deletions(-)
create mode 100644 Misc/NEWS.d/next/Library/2023-10-20-15-28-08.gh-issue-102988.dStNO7.rst
diff --git a/Doc/library/email.utils.rst b/Doc/library/email.utils.rst
index 0e266b6..6723dc4 100644
--- a/Doc/library/email.utils.rst
+++ b/Doc/library/email.utils.rst
@@ -60,13 +60,18 @@ of the new API.
begins with angle brackets, they are stripped off.
-.. function:: parseaddr(address)
+.. function:: parseaddr(address, *, strict=True)
Parse address -- which should be the value of some address-containing field such
as :mailheader:`To` or :mailheader:`Cc` -- into its constituent *realname* and
*email address* parts. Returns a tuple of that information, unless the parse
fails, in which case a 2-tuple of ``('', '')`` is returned.
+ If *strict* is true, use a strict parser which rejects malformed inputs.
+
+ .. versionchanged:: 3.13
+ Add *strict* optional parameter and reject malformed inputs by default.
+
.. function:: formataddr(pair, charset='utf-8')
@@ -84,12 +89,15 @@ of the new API.
Added the *charset* option.
-.. function:: getaddresses(fieldvalues)
+.. function:: getaddresses(fieldvalues, *, strict=True)
This method returns a list of 2-tuples of the form returned by ``parseaddr()``.
*fieldvalues* is a sequence of header field values as might be returned by
- :meth:`Message.get_all <email.message.Message.get_all>`. Here's a simple
- example that gets all the recipients of a message::
+ :meth:`Message.get_all <email.message.Message.get_all>`.
+
+ If *strict* is true, use a strict parser which rejects malformed inputs.
+
+ Here's a simple example that gets all the recipients of a message::
from email.utils import getaddresses
@@ -99,6 +107,9 @@ of the new API.
resent_ccs = msg.get_all('resent-cc', [])
all_recipients = getaddresses(tos + ccs + resent_tos + resent_ccs)
+ .. versionchanged:: 3.13
+ Add *strict* optional parameter and reject malformed inputs by default.
+
.. function:: parsedate(date)
diff --git a/Lib/email/utils.py b/Lib/email/utils.py
index 95620b7..98e5ee1 100644
--- a/Lib/email/utils.py
+++ b/Lib/email/utils.py
@@ -48,6 +48,7 @@ TICK = "'"
specialsre = re.compile(r'[][\\()<>@,:;".]')
escapesre = re.compile(r'[\\"]')
+
def _has_surrogates(s):
"""Return True if s contains surrogate-escaped binary data."""
# This check is based on the fact that unless there are surrogates, utf8
@@ -105,11 +106,126 @@ def formataddr(pair, charset='utf-8'):
return '%s%s%s <%s>' % (quotes, name, quotes, address)
return address
-def getaddresses(fieldvalues):
- """Return a list of (REALNAME, EMAIL) for each fieldvalue."""
- all = COMMASPACE.join(str(v) for v in fieldvalues)
- a = _AddressList(all)
- return a.addresslist
+def _iter_escaped_chars(addr):
+ pos = 0
+ escape = False
+ for pos, ch in enumerate(addr):
+ if escape:
+ yield (pos, '\\' + ch)
+ escape = False
+ elif ch == '\\':
+ escape = True
+ else:
+ yield (pos, ch)
+ if escape:
+ yield (pos, '\\')
+
+
+def _strip_quoted_realnames(addr):
+ """Strip real names between quotes."""
+ if '"' not in addr:
+ # Fast path
+ return addr
+
+ start = 0
+ open_pos = None
+ result = []
+ for pos, ch in _iter_escaped_chars(addr):
+ if ch == '"':
+ if open_pos is None:
+ open_pos = pos
+ else:
+ if start != open_pos:
+ result.append(addr[start:open_pos])
+ start = pos + 1
+ open_pos = None
+
+ if start < len(addr):
+ result.append(addr[start:])
+
+ return ''.join(result)
+
+supports_strict_parsing = True
+
+def getaddresses(fieldvalues, *, strict=True):
+ """Return a list of (REALNAME, EMAIL) or ('','') for each fieldvalue.
+
+ When parsing fails for a fieldvalue, a 2-tuple of ('', '') is returned in
+ its place.
+
+ If strict is true, use a strict parser which rejects malformed inputs.
+ """
+
+ # If strict is true, if the resulting list of parsed addresses is greater
+ # than the number of fieldvalues in the input list, a parsing error has
+ # occurred and consequently a list containing a single empty 2-tuple [('',
+ # '')] is returned in its place. This is done to avoid invalid output.
+ #
+ # Malformed input: getaddresses(['alice@example.com <bob@example.com>'])
+ # Invalid output: [('', 'alice@example.com'), ('', 'bob@example.com')]
+ # Safe output: [('', '')]
+
+ if not strict:
+ all = COMMASPACE.join(str(v) for v in fieldvalues)
+ a = _AddressList(all)
+ return a.addresslist
+
+ fieldvalues = [str(v) for v in fieldvalues]
+ fieldvalues = _pre_parse_validation(fieldvalues)
+ addr = COMMASPACE.join(fieldvalues)
+ a = _AddressList(addr)
+ result = _post_parse_validation(a.addresslist)
+
+ # Treat output as invalid if the number of addresses is not equal to the
+ # expected number of addresses.
+ n = 0
+ for v in fieldvalues:
+ # When a comma is used in the Real Name part it is not a deliminator.
+ # So strip those out before counting the commas.
+ v = _strip_quoted_realnames(v)
+ # Expected number of addresses: 1 + number of commas
+ n += 1 + v.count(',')
+ if len(result) != n:
+ return [('', '')]
+
+ return result
+
+
+def _check_parenthesis(addr):
+ # Ignore parenthesis in quoted real names.
+ addr = _strip_quoted_realnames(addr)
+
+ opens = 0
+ for pos, ch in _iter_escaped_chars(addr):
+ if ch == '(':
+ opens += 1
+ elif ch == ')':
+ opens -= 1
+ if opens < 0:
+ return False
+ return (opens == 0)
+
+
+def _pre_parse_validation(email_header_fields):
+ accepted_values = []
+ for v in email_header_fields:
+ if not _check_parenthesis(v):
+ v = "('', '')"
+ accepted_values.append(v)
+
+ return accepted_values
+
+
+def _post_parse_validation(parsed_email_header_tuples):
+ accepted_values = []
+ # The parser would have parsed a correctly formatted domain-literal
+ # The existence of an [ after parsing indicates a parsing failure
+ for v in parsed_email_header_tuples:
+ if '[' in v[1]:
+ v = ('', '')
+ accepted_values.append(v)
+
+ return accepted_values
def _format_timetuple_and_zone(timetuple, zone):
@@ -203,16 +319,33 @@ def parsedate_to_datetime(data):
tzinfo=datetime.timezone(datetime.timedelta(seconds=tz)))
-def parseaddr(addr):
+def parseaddr(addr, *, strict=True):
"""
Parse addr into its constituent realname and email address parts.
Return a tuple of realname and email address, unless the parse fails, in
which case return a 2-tuple of ('', '').
+
+ If strict is True, use a strict parser which rejects malformed inputs.
"""
- addrs = _AddressList(addr).addresslist
- if not addrs:
- return '', ''
+ if not strict:
+ addrs = _AddressList(addr).addresslist
+ if not addrs:
+ return ('', '')
+ return addrs[0]
+
+ if isinstance(addr, list):
+ addr = addr[0]
+
+ if not isinstance(addr, str):
+ return ('', '')
+
+ addr = _pre_parse_validation([addr])[0]
+ addrs = _post_parse_validation(_AddressList(addr).addresslist)
+
+ if not addrs or len(addrs) > 1:
+ return ('', '')
+
return addrs[0]
diff --git a/Lib/test/test_email/test_email.py b/Lib/test/test_email/test_email.py
index 473a488..c4b0a4e 100644
--- a/Lib/test/test_email/test_email.py
+++ b/Lib/test/test_email/test_email.py
@@ -17,6 +17,7 @@ from unittest.mock import patch
import email
import email.policy
+import email.utils
from email.charset import Charset
from email.generator import Generator, DecodedGenerator, BytesGenerator
@@ -3338,15 +3339,137 @@ Foo
],
)
+ def test_parsing_errors(self):
+ """Test for parsing errors from CVE-2023-27043 and CVE-2019-16056"""
+ alice = 'alice@example.org'
+ bob = 'bob@example.com'
+ empty = ('', '')
+
+ # Test utils.getaddresses() and utils.parseaddr() on malformed email
+ # addresses: default behavior (strict=True) rejects malformed address,
+ # and strict=False which tolerates malformed address.
+ for invalid_separator, expected_non_strict in (
+ ('(', [(f'<{bob}>', alice)]),
+ (')', [('', alice), empty, ('', bob)]),
+ ('<', [('', alice), empty, ('', bob), empty]),
+ ('>', [('', alice), empty, ('', bob)]),
+ ('[', [('', f'{alice}[<{bob}>]')]),
+ (']', [('', alice), empty, ('', bob)]),
+ ('@', [empty, empty, ('', bob)]),
+ (';', [('', alice), empty, ('', bob)]),
+ (':', [('', alice), ('', bob)]),
+ ('.', [('', alice + '.'), ('', bob)]),
+ ('"', [('', alice), ('', f'<{bob}>')]),
+ ):
+ address = f'{alice}{invalid_separator}<{bob}>'
+ with self.subTest(address=address):
+ self.assertEqual(utils.getaddresses([address]),
+ [empty])
+ self.assertEqual(utils.getaddresses([address], strict=False),
+ expected_non_strict)
+
+ self.assertEqual(utils.parseaddr([address]),
+ empty)
+ self.assertEqual(utils.parseaddr([address], strict=False),
+ ('', address))
+
+ # Comma (',') is treated differently depending on strict parameter.
+ # Comma without quotes.
+ address = f'{alice},<{bob}>'
+ self.assertEqual(utils.getaddresses([address]),
+ [('', alice), ('', bob)])
+ self.assertEqual(utils.getaddresses([address], strict=False),
+ [('', alice), ('', bob)])
+ self.assertEqual(utils.parseaddr([address]),
+ empty)
+ self.assertEqual(utils.parseaddr([address], strict=False),
+ ('', address))
+
+ # Real name between quotes containing comma.
+ address = '"Alice, alice@example.org" <bob@example.com>'
+ expected_strict = ('Alice, alice@example.org', 'bob@example.com')
+ self.assertEqual(utils.getaddresses([address]), [expected_strict])
+ self.assertEqual(utils.getaddresses([address], strict=False), [expected_strict])
+ self.assertEqual(utils.parseaddr([address]), expected_strict)
+ self.assertEqual(utils.parseaddr([address], strict=False),
+ ('', address))
+
+ # Valid parenthesis in comments.
+ address = 'alice@example.org (Alice)'
+ expected_strict = ('Alice', 'alice@example.org')
+ self.assertEqual(utils.getaddresses([address]), [expected_strict])
+ self.assertEqual(utils.getaddresses([address], strict=False), [expected_strict])
+ self.assertEqual(utils.parseaddr([address]), expected_strict)
+ self.assertEqual(utils.parseaddr([address], strict=False),
+ ('', address))
+
+ # Invalid parenthesis in comments.
+ address = 'alice@example.org )Alice('
+ self.assertEqual(utils.getaddresses([address]), [empty])
+ self.assertEqual(utils.getaddresses([address], strict=False),
+ [('', 'alice@example.org'), ('', ''), ('', 'Alice')])
+ self.assertEqual(utils.parseaddr([address]), empty)
+ self.assertEqual(utils.parseaddr([address], strict=False),
+ ('', address))
+
+ # Two addresses with quotes separated by comma.
+ address = '"Jane Doe" <jane@example.net>, "John Doe" <john@example.net>'
+ self.assertEqual(utils.getaddresses([address]),
+ [('Jane Doe', 'jane@example.net'),
+ ('John Doe', 'john@example.net')])
+ self.assertEqual(utils.getaddresses([address], strict=False),
+ [('Jane Doe', 'jane@example.net'),
+ ('John Doe', 'john@example.net')])
+ self.assertEqual(utils.parseaddr([address]), empty)
+ self.assertEqual(utils.parseaddr([address], strict=False),
+ ('', address))
+
+ # Test email.utils.supports_strict_parsing attribute
+ self.assertEqual(email.utils.supports_strict_parsing, True)
+
def test_getaddresses_nasty(self):
- eq = self.assertEqual
- eq(utils.getaddresses(['foo: ;']), [('', '')])
- eq(utils.getaddresses(
- ['[]*-- =~$']),
- [('', ''), ('', ''), ('', '*--')])
- eq(utils.getaddresses(
- ['foo: ;', '"Jason R. Mastaler" <jason@dom.ain>']),
- [('', ''), ('Jason R. Mastaler', 'jason@dom.ain')])
+ for addresses, expected in (
+ (['"Sürname, Firstname" <to@example.com>'],
+ [('Sürname, Firstname', 'to@example.com')]),
+
+ (['foo: ;'],
+ [('', '')]),
+
+ (['foo: ;', '"Jason R. Mastaler" <jason@dom.ain>'],
+ [('', ''), ('Jason R. Mastaler', 'jason@dom.ain')]),
+
+ ([r'Pete(A nice \) chap) <pete(his account)@silly.test(his host)>'],
+ [('Pete (A nice ) chap his account his host)', 'pete@silly.test')]),
+
+ (['(Empty list)(start)Undisclosed recipients :(nobody(I know))'],
+ [('', '')]),
+
+ (['Mary <@machine.tld:mary@example.net>, , jdoe@test . example'],
+ [('Mary', 'mary@example.net'), ('', ''), ('', 'jdoe@test.example')]),
+
+ (['John Doe <jdoe@machine(comment). example>'],
+ [('John Doe (comment)', 'jdoe@machine.example')]),
+
+ (['"Mary Smith: Personal Account" <smith@home.example>'],
+ [('Mary Smith: Personal Account', 'smith@home.example')]),
+
+ (['Undisclosed recipients:;'],
+ [('', '')]),
+
+ ([r'<boss@nil.test>, "Giant; \"Big\" Box" <bob@example.net>'],
+ [('', 'boss@nil.test'), ('Giant; "Big" Box', 'bob@example.net')]),
+ ):
+ with self.subTest(addresses=addresses):
+ self.assertEqual(utils.getaddresses(addresses),
+ expected)
+ self.assertEqual(utils.getaddresses(addresses, strict=False),
+ expected)
+
+ addresses = ['[]*-- =~$']
+ self.assertEqual(utils.getaddresses(addresses),
+ [('', '')])
+ self.assertEqual(utils.getaddresses(addresses, strict=False),
+ [('', ''), ('', ''), ('', '*--')])
def test_getaddresses_embedded_comment(self):
"""Test proper handling of a nested comment"""
@@ -3537,6 +3660,53 @@ multipart/report
m = cls(*constructor, policy=email.policy.default)
self.assertIs(m.policy, email.policy.default)
+ def test_iter_escaped_chars(self):
+ self.assertEqual(list(utils._iter_escaped_chars(r'a\\b\"c\\"d')),
+ [(0, 'a'),
+ (2, '\\\\'),
+ (3, 'b'),
+ (5, '\\"'),
+ (6, 'c'),
+ (8, '\\\\'),
+ (9, '"'),
+ (10, 'd')])
+ self.assertEqual(list(utils._iter_escaped_chars('a\\')),
+ [(0, 'a'), (1, '\\')])
+
+ def test_strip_quoted_realnames(self):
+ def check(addr, expected):
+ self.assertEqual(utils._strip_quoted_realnames(addr), expected)
+
+ check('"Jane Doe" <jane@example.net>, "John Doe" <john@example.net>',
+ ' <jane@example.net>, <john@example.net>')
+ check(r'"Jane \"Doe\"." <jane@example.net>',
+ ' <jane@example.net>')
+
+ # special cases
+ check(r'before"name"after', 'beforeafter')
+ check(r'before"name"', 'before')
+ check(r'b"name"', 'b') # single char
+ check(r'"name"after', 'after')
+ check(r'"name"a', 'a') # single char
+ check(r'"name"', '')
+
+ # no change
+ for addr in (
+ 'Jane Doe <jane@example.net>, John Doe <john@example.net>',
+ 'lone " quote',
+ ):
+ self.assertEqual(utils._strip_quoted_realnames(addr), addr)
+
+
+ def test_check_parenthesis(self):
+ addr = 'alice@example.net'
+ self.assertTrue(utils._check_parenthesis(f'{addr} (Alice)'))
+ self.assertFalse(utils._check_parenthesis(f'{addr} )Alice('))
+ self.assertFalse(utils._check_parenthesis(f'{addr} (Alice))'))
+ self.assertFalse(utils._check_parenthesis(f'{addr} ((Alice)'))
+
+ # Ignore real name between quotes
+ self.assertTrue(utils._check_parenthesis(f'")Alice((" {addr}'))
# Test the iterator/generators
class TestIterators(TestEmailBase):
diff --git a/Misc/NEWS.d/next/Library/2023-10-20-15-28-08.gh-issue-102988.dStNO7.rst b/Misc/NEWS.d/next/Library/2023-10-20-15-28-08.gh-issue-102988.dStNO7.rst
new file mode 100644
index 0000000..3d0e9e4
--- /dev/null
+++ b/Misc/NEWS.d/next/Library/2023-10-20-15-28-08.gh-issue-102988.dStNO7.rst
@@ -0,0 +1,8 @@
+:func:`email.utils.getaddresses` and :func:`email.utils.parseaddr` now
+return ``('', '')`` 2-tuples in more situations where invalid email
+addresses are encountered instead of potentially inaccurate values. Add
+optional *strict* parameter to these two functions: use ``strict=False`` to
+get the old behavior, accept malformed inputs.
+``getattr(email.utils, 'supports_strict_parsing', False)`` can be use to check
+if the *strict* paramater is available. Patch by Thomas Dwyer and Victor
+Stinner to improve the CVE-2023-27043 fix.
--
2.27.0

View File

@ -0,0 +1,218 @@
From 5585334d772b253a01a6730e8202ffb1607c3d25 Mon Sep 17 00:00:00 2001
From: Serhiy Storchaka <storchaka@gmail.com>
Date: Thu, 7 Dec 2023 18:37:10 +0200
Subject: [PATCH] [3.11] gh-91133: tempfile.TemporaryDirectory: fix symlink bug
in cleanup (GH-99930) (GH-112839)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
(cherry picked from commit 81c16cd94ec38d61aa478b9a452436dc3b1b524d)
Co-authored-by: Søren Løvborg <sorenl@unity3d.com>
---
Lib/tempfile.py | 27 +++--
Lib/test/test_tempfile.py | 111 +++++++++++++++++-
...2-12-01-16-57-44.gh-issue-91133.LKMVCV.rst | 2 +
3 files changed, 125 insertions(+), 15 deletions(-)
create mode 100644 Misc/NEWS.d/next/Library/2022-12-01-16-57-44.gh-issue-91133.LKMVCV.rst
diff --git a/Lib/tempfile.py b/Lib/tempfile.py
index aace11fa7b1..f59a63a7b45 100644
--- a/Lib/tempfile.py
+++ b/Lib/tempfile.py
@@ -270,6 +270,22 @@ def _mkstemp_inner(dir, pre, suf, flags, output_type):
raise FileExistsError(_errno.EEXIST,
"No usable temporary file name found")
+def _dont_follow_symlinks(func, path, *args):
+ # Pass follow_symlinks=False, unless not supported on this platform.
+ if func in _os.supports_follow_symlinks:
+ func(path, *args, follow_symlinks=False)
+ elif _os.name == 'nt' or not _os.path.islink(path):
+ func(path, *args)
+
+def _resetperms(path):
+ try:
+ chflags = _os.chflags
+ except AttributeError:
+ pass
+ else:
+ _dont_follow_symlinks(chflags, path, 0)
+ _dont_follow_symlinks(_os.chmod, path, 0o700)
+
# User visible interfaces.
@@ -863,17 +879,10 @@ def __init__(self, suffix=None, prefix=None, dir=None,
def _rmtree(cls, name, ignore_errors=False):
def onerror(func, path, exc_info):
if issubclass(exc_info[0], PermissionError):
- def resetperms(path):
- try:
- _os.chflags(path, 0)
- except AttributeError:
- pass
- _os.chmod(path, 0o700)
-
try:
if path != name:
- resetperms(_os.path.dirname(path))
- resetperms(path)
+ _resetperms(_os.path.dirname(path))
+ _resetperms(path)
try:
_os.unlink(path)
diff --git a/Lib/test/test_tempfile.py b/Lib/test/test_tempfile.py
index 1242ec7e3cc..675edc8de9c 100644
--- a/Lib/test/test_tempfile.py
+++ b/Lib/test/test_tempfile.py
@@ -1565,6 +1565,103 @@ def test_cleanup_with_symlink_to_a_directory(self):
"were deleted")
d2.cleanup()
+ @os_helper.skip_unless_symlink
+ def test_cleanup_with_symlink_modes(self):
+ # cleanup() should not follow symlinks when fixing mode bits (#91133)
+ with self.do_create(recurse=0) as d2:
+ file1 = os.path.join(d2, 'file1')
+ open(file1, 'wb').close()
+ dir1 = os.path.join(d2, 'dir1')
+ os.mkdir(dir1)
+ for mode in range(8):
+ mode <<= 6
+ with self.subTest(mode=format(mode, '03o')):
+ def test(target, target_is_directory):
+ d1 = self.do_create(recurse=0)
+ symlink = os.path.join(d1.name, 'symlink')
+ os.symlink(target, symlink,
+ target_is_directory=target_is_directory)
+ try:
+ os.chmod(symlink, mode, follow_symlinks=False)
+ except NotImplementedError:
+ pass
+ try:
+ os.chmod(symlink, mode)
+ except FileNotFoundError:
+ pass
+ os.chmod(d1.name, mode)
+ d1.cleanup()
+ self.assertFalse(os.path.exists(d1.name))
+
+ with self.subTest('nonexisting file'):
+ test('nonexisting', target_is_directory=False)
+ with self.subTest('nonexisting dir'):
+ test('nonexisting', target_is_directory=True)
+
+ with self.subTest('existing file'):
+ os.chmod(file1, mode)
+ old_mode = os.stat(file1).st_mode
+ test(file1, target_is_directory=False)
+ new_mode = os.stat(file1).st_mode
+ self.assertEqual(new_mode, old_mode,
+ '%03o != %03o' % (new_mode, old_mode))
+
+ with self.subTest('existing dir'):
+ os.chmod(dir1, mode)
+ old_mode = os.stat(dir1).st_mode
+ test(dir1, target_is_directory=True)
+ new_mode = os.stat(dir1).st_mode
+ self.assertEqual(new_mode, old_mode,
+ '%03o != %03o' % (new_mode, old_mode))
+
+ @unittest.skipUnless(hasattr(os, 'chflags'), 'requires os.chflags')
+ @os_helper.skip_unless_symlink
+ def test_cleanup_with_symlink_flags(self):
+ # cleanup() should not follow symlinks when fixing flags (#91133)
+ flags = stat.UF_IMMUTABLE | stat.UF_NOUNLINK
+ self.check_flags(flags)
+
+ with self.do_create(recurse=0) as d2:
+ file1 = os.path.join(d2, 'file1')
+ open(file1, 'wb').close()
+ dir1 = os.path.join(d2, 'dir1')
+ os.mkdir(dir1)
+ def test(target, target_is_directory):
+ d1 = self.do_create(recurse=0)
+ symlink = os.path.join(d1.name, 'symlink')
+ os.symlink(target, symlink,
+ target_is_directory=target_is_directory)
+ try:
+ os.chflags(symlink, flags, follow_symlinks=False)
+ except NotImplementedError:
+ pass
+ try:
+ os.chflags(symlink, flags)
+ except FileNotFoundError:
+ pass
+ os.chflags(d1.name, flags)
+ d1.cleanup()
+ self.assertFalse(os.path.exists(d1.name))
+
+ with self.subTest('nonexisting file'):
+ test('nonexisting', target_is_directory=False)
+ with self.subTest('nonexisting dir'):
+ test('nonexisting', target_is_directory=True)
+
+ with self.subTest('existing file'):
+ os.chflags(file1, flags)
+ old_flags = os.stat(file1).st_flags
+ test(file1, target_is_directory=False)
+ new_flags = os.stat(file1).st_flags
+ self.assertEqual(new_flags, old_flags)
+
+ with self.subTest('existing dir'):
+ os.chflags(dir1, flags)
+ old_flags = os.stat(dir1).st_flags
+ test(dir1, target_is_directory=True)
+ new_flags = os.stat(dir1).st_flags
+ self.assertEqual(new_flags, old_flags)
+
@support.cpython_only
def test_del_on_collection(self):
# A TemporaryDirectory is deleted when garbage collected
@@ -1737,10 +1834,7 @@ def test_modes(self):
d.cleanup()
self.assertFalse(os.path.exists(d.name))
- @unittest.skipUnless(hasattr(os, 'chflags'), 'requires os.chflags')
- def test_flags(self):
- flags = stat.UF_IMMUTABLE | stat.UF_NOUNLINK
-
+ def check_flags(self, flags):
# skip the test if these flags are not supported (ex: FreeBSD 13)
filename = os_helper.TESTFN
try:
@@ -1749,13 +1843,18 @@ def test_flags(self):
os.chflags(filename, flags)
except OSError as exc:
# "OSError: [Errno 45] Operation not supported"
- self.skipTest(f"chflags() doesn't support "
- f"UF_IMMUTABLE|UF_NOUNLINK: {exc}")
+ self.skipTest(f"chflags() doesn't support flags "
+ f"{flags:#b}: {exc}")
else:
os.chflags(filename, 0)
finally:
os_helper.unlink(filename)
+ @unittest.skipUnless(hasattr(os, 'chflags'), 'requires os.chflags')
+ def test_flags(self):
+ flags = stat.UF_IMMUTABLE | stat.UF_NOUNLINK
+ self.check_flags(flags)
+
d = self.do_create(recurse=3, dirs=2, files=2)
with d:
# Change files and directories flags recursively.
diff --git a/Misc/NEWS.d/next/Library/2022-12-01-16-57-44.gh-issue-91133.LKMVCV.rst b/Misc/NEWS.d/next/Library/2022-12-01-16-57-44.gh-issue-91133.LKMVCV.rst
new file mode 100644
index 00000000000..7991048fc48
--- /dev/null
+++ b/Misc/NEWS.d/next/Library/2022-12-01-16-57-44.gh-issue-91133.LKMVCV.rst
@@ -0,0 +1,2 @@
+Fix a bug in :class:`tempfile.TemporaryDirectory` cleanup, which now no longer
+dereferences symlinks when working around file system permission errors.
--
2.33.0

View File

@ -0,0 +1,146 @@
From a956e510f6336d5ae111ba429a61c3ade30a7549 Mon Sep 17 00:00:00 2001
From: "Miss Islington (bot)"
<31488909+miss-islington@users.noreply.github.com>
Date: Thu, 11 Jan 2024 10:24:47 +0100
Subject: [PATCH] [3.11] gh-109858: Protect zipfile from "quoted-overlap"
zipbomb (GH-110016) (GH-113913)
Raise BadZipFile when try to read an entry that overlaps with other entry or
central directory.
(cherry picked from commit 66363b9a7b9fe7c99eba3a185b74c5fdbf842eba)
Co-authored-by: Serhiy Storchaka <storchaka@gmail.com>
---
Lib/test/test_zipfile.py | 60 +++++++++++++++++++
Lib/zipfile.py | 12 ++++
...-09-28-13-15-51.gh-issue-109858.43e2dg.rst | 3 +
3 files changed, 75 insertions(+)
create mode 100644 Misc/NEWS.d/next/Library/2023-09-28-13-15-51.gh-issue-109858.43e2dg.rst
diff --git a/Lib/test/test_zipfile.py b/Lib/test/test_zipfile.py
index c8e0159765e..9354ab74faa 100644
--- a/Lib/test/test_zipfile.py
+++ b/Lib/test/test_zipfile.py
@@ -2216,6 +2216,66 @@ def test_decompress_without_3rd_party_library(self):
with zipfile.ZipFile(zip_file) as zf:
self.assertRaises(RuntimeError, zf.extract, 'a.txt')
+ @requires_zlib()
+ def test_full_overlap(self):
+ data = (
+ b'PK\x03\x04\x14\x00\x00\x00\x08\x00\xa0lH\x05\xe2\x1e'
+ b'8\xbb\x10\x00\x00\x00\t\x04\x00\x00\x01\x00\x00\x00a\xed'
+ b'\xc0\x81\x08\x00\x00\x00\xc00\xd6\xfbK\\d\x0b`P'
+ b'K\x01\x02\x14\x00\x14\x00\x00\x00\x08\x00\xa0lH\x05\xe2'
+ b'\x1e8\xbb\x10\x00\x00\x00\t\x04\x00\x00\x01\x00\x00\x00\x00'
+ b'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00aPK'
+ b'\x01\x02\x14\x00\x14\x00\x00\x00\x08\x00\xa0lH\x05\xe2\x1e'
+ b'8\xbb\x10\x00\x00\x00\t\x04\x00\x00\x01\x00\x00\x00\x00\x00'
+ b'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00bPK\x05'
+ b'\x06\x00\x00\x00\x00\x02\x00\x02\x00^\x00\x00\x00/\x00\x00'
+ b'\x00\x00\x00'
+ )
+ with zipfile.ZipFile(io.BytesIO(data), 'r') as zipf:
+ self.assertEqual(zipf.namelist(), ['a', 'b'])
+ zi = zipf.getinfo('a')
+ self.assertEqual(zi.header_offset, 0)
+ self.assertEqual(zi.compress_size, 16)
+ self.assertEqual(zi.file_size, 1033)
+ zi = zipf.getinfo('b')
+ self.assertEqual(zi.header_offset, 0)
+ self.assertEqual(zi.compress_size, 16)
+ self.assertEqual(zi.file_size, 1033)
+ self.assertEqual(len(zipf.read('a')), 1033)
+ with self.assertRaisesRegex(zipfile.BadZipFile, 'File name.*differ'):
+ zipf.read('b')
+
+ @requires_zlib()
+ def test_quoted_overlap(self):
+ data = (
+ b'PK\x03\x04\x14\x00\x00\x00\x08\x00\xa0lH\x05Y\xfc'
+ b'8\x044\x00\x00\x00(\x04\x00\x00\x01\x00\x00\x00a\x00'
+ b'\x1f\x00\xe0\xffPK\x03\x04\x14\x00\x00\x00\x08\x00\xa0l'
+ b'H\x05\xe2\x1e8\xbb\x10\x00\x00\x00\t\x04\x00\x00\x01\x00'
+ b'\x00\x00b\xed\xc0\x81\x08\x00\x00\x00\xc00\xd6\xfbK\\'
+ b'd\x0b`PK\x01\x02\x14\x00\x14\x00\x00\x00\x08\x00\xa0'
+ b'lH\x05Y\xfc8\x044\x00\x00\x00(\x04\x00\x00\x01'
+ b'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00'
+ b'\x00aPK\x01\x02\x14\x00\x14\x00\x00\x00\x08\x00\xa0l'
+ b'H\x05\xe2\x1e8\xbb\x10\x00\x00\x00\t\x04\x00\x00\x01\x00'
+ b'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00$\x00\x00\x00'
+ b'bPK\x05\x06\x00\x00\x00\x00\x02\x00\x02\x00^\x00\x00'
+ b'\x00S\x00\x00\x00\x00\x00'
+ )
+ with zipfile.ZipFile(io.BytesIO(data), 'r') as zipf:
+ self.assertEqual(zipf.namelist(), ['a', 'b'])
+ zi = zipf.getinfo('a')
+ self.assertEqual(zi.header_offset, 0)
+ self.assertEqual(zi.compress_size, 52)
+ self.assertEqual(zi.file_size, 1064)
+ zi = zipf.getinfo('b')
+ self.assertEqual(zi.header_offset, 36)
+ self.assertEqual(zi.compress_size, 16)
+ self.assertEqual(zi.file_size, 1033)
+ with self.assertRaisesRegex(zipfile.BadZipFile, 'Overlapped entries'):
+ zipf.read('a')
+ self.assertEqual(len(zipf.read('b')), 1033)
+
def tearDown(self):
unlink(TESTFN)
unlink(TESTFN2)
diff --git a/Lib/zipfile.py b/Lib/zipfile.py
index 6189db5e3e4..058d7163ea1 100644
--- a/Lib/zipfile.py
+++ b/Lib/zipfile.py
@@ -367,6 +367,7 @@ class ZipInfo (object):
'compress_size',
'file_size',
'_raw_time',
+ '_end_offset',
)
def __init__(self, filename="NoName", date_time=(1980,1,1,0,0,0)):
@@ -408,6 +409,7 @@ def __init__(self, filename="NoName", date_time=(1980,1,1,0,0,0)):
self.external_attr = 0 # External file attributes
self.compress_size = 0 # Size of the compressed file
self.file_size = 0 # Size of the uncompressed file
+ self._end_offset = None # Start of the next local header or central directory
# Other attributes are set by class ZipFile:
# header_offset Byte offset to the file header
# CRC CRC-32 of the uncompressed file
@@ -1437,6 +1439,12 @@ def _RealGetContents(self):
if self.debug > 2:
print("total", total)
+ end_offset = self.start_dir
+ for zinfo in sorted(self.filelist,
+ key=lambda zinfo: zinfo.header_offset,
+ reverse=True):
+ zinfo._end_offset = end_offset
+ end_offset = zinfo.header_offset
def namelist(self):
"""Return a list of file names in the archive."""
@@ -1590,6 +1598,10 @@ def open(self, name, mode="r", pwd=None, *, force_zip64=False):
'File name in directory %r and header %r differ.'
% (zinfo.orig_filename, fname))
+ if (zinfo._end_offset is not None and
+ zef_file.tell() + zinfo.compress_size > zinfo._end_offset):
+ raise BadZipFile(f"Overlapped entries: {zinfo.orig_filename!r} (possible zip bomb)")
+
# check for encrypted flag & handle password
is_encrypted = zinfo.flag_bits & _MASK_ENCRYPTED
if is_encrypted:
diff --git a/Misc/NEWS.d/next/Library/2023-09-28-13-15-51.gh-issue-109858.43e2dg.rst b/Misc/NEWS.d/next/Library/2023-09-28-13-15-51.gh-issue-109858.43e2dg.rst
new file mode 100644
index 00000000000..be279caffc4
--- /dev/null
+++ b/Misc/NEWS.d/next/Library/2023-09-28-13-15-51.gh-issue-109858.43e2dg.rst
@@ -0,0 +1,3 @@
+Protect :mod:`zipfile` from "quoted-overlap" zipbomb. It now raises
+BadZipFile when try to read an entry that overlaps with other entry or
+central directory.
--
2.33.0

View File

@ -0,0 +1,218 @@
From 5f90abaa786f994db3907fc31e2ee00ea2cf0929 Mon Sep 17 00:00:00 2001
From: "Miss Islington (bot)"
<31488909+miss-islington@users.noreply.github.com>
Date: Tue, 30 Jul 2024 14:43:45 +0200
Subject: [PATCH] [3.11] gh-122133: Authenticate socket connection for
`socket.socketpair()` fallback (GH-122134) (#122426)
Authenticate socket connection for `socket.socketpair()` fallback when the platform does not have a native `socketpair` C API. We authenticate in-process using `getsocketname` and `getpeername` (thanks to Nathaniel J Smith for that suggestion).
(cherry picked from commit 78df1043dbdce5c989600616f9f87b4ee72944e5)
Co-authored-by: Seth Michael Larson <seth@python.org>
Co-authored-by: Gregory P. Smith <greg@krypto.org>
---
Lib/socket.py | 17 +++
Lib/test/test_socket.py | 128 +++++++++++++++++-
...-07-22-13-11-28.gh-issue-122133.0mPeta.rst | 5 +
3 files changed, 147 insertions(+), 3 deletions(-)
create mode 100644 Misc/NEWS.d/next/Security/2024-07-22-13-11-28.gh-issue-122133.0mPeta.rst
diff --git a/Lib/socket.py b/Lib/socket.py
index a0567b76bcf..591d4739a64 100644
--- a/Lib/socket.py
+++ b/Lib/socket.py
@@ -648,6 +648,23 @@ def socketpair(family=AF_INET, type=SOCK_STREAM, proto=0):
raise
finally:
lsock.close()
+
+ # Authenticating avoids using a connection from something else
+ # able to connect to {host}:{port} instead of us.
+ # We expect only AF_INET and AF_INET6 families.
+ try:
+ if (
+ ssock.getsockname() != csock.getpeername()
+ or csock.getsockname() != ssock.getpeername()
+ ):
+ raise ConnectionError("Unexpected peer connection")
+ except:
+ # getsockname() and getpeername() can fail
+ # if either socket isn't connected.
+ ssock.close()
+ csock.close()
+ raise
+
return (ssock, csock)
__all__.append("socketpair")
diff --git a/Lib/test/test_socket.py b/Lib/test/test_socket.py
index 42adc573ecc..a60eb436c7b 100644
--- a/Lib/test/test_socket.py
+++ b/Lib/test/test_socket.py
@@ -542,19 +542,27 @@ class SocketPairTest(unittest.TestCase, ThreadableTest):
def __init__(self, methodName='runTest'):
unittest.TestCase.__init__(self, methodName=methodName)
ThreadableTest.__init__(self)
+ self.cli = None
+ self.serv = None
+
+ def socketpair(self):
+ # To be overridden by some child classes.
+ return socket.socketpair()
def setUp(self):
- self.serv, self.cli = socket.socketpair()
+ self.serv, self.cli = self.socketpair()
def tearDown(self):
- self.serv.close()
+ if self.serv:
+ self.serv.close()
self.serv = None
def clientSetUp(self):
pass
def clientTearDown(self):
- self.cli.close()
+ if self.cli:
+ self.cli.close()
self.cli = None
ThreadableTest.clientTearDown(self)
@@ -4667,6 +4675,120 @@ def _testSend(self):
self.assertEqual(msg, MSG)
+class PurePythonSocketPairTest(SocketPairTest):
+
+ # Explicitly use socketpair AF_INET or AF_INET6 to ensure that is the
+ # code path we're using regardless platform is the pure python one where
+ # `_socket.socketpair` does not exist. (AF_INET does not work with
+ # _socket.socketpair on many platforms).
+ def socketpair(self):
+ # called by super().setUp().
+ try:
+ return socket.socketpair(socket.AF_INET6)
+ except OSError:
+ return socket.socketpair(socket.AF_INET)
+
+ # Local imports in this class make for easy security fix backporting.
+
+ def setUp(self):
+ import _socket
+ self._orig_sp = getattr(_socket, 'socketpair', None)
+ if self._orig_sp is not None:
+ # This forces the version using the non-OS provided socketpair
+ # emulation via an AF_INET socket in Lib/socket.py.
+ del _socket.socketpair
+ import importlib
+ global socket
+ socket = importlib.reload(socket)
+ else:
+ pass # This platform already uses the non-OS provided version.
+ super().setUp()
+
+ def tearDown(self):
+ super().tearDown()
+ import _socket
+ if self._orig_sp is not None:
+ # Restore the default socket.socketpair definition.
+ _socket.socketpair = self._orig_sp
+ import importlib
+ global socket
+ socket = importlib.reload(socket)
+
+ def test_recv(self):
+ msg = self.serv.recv(1024)
+ self.assertEqual(msg, MSG)
+
+ def _test_recv(self):
+ self.cli.send(MSG)
+
+ def test_send(self):
+ self.serv.send(MSG)
+
+ def _test_send(self):
+ msg = self.cli.recv(1024)
+ self.assertEqual(msg, MSG)
+
+ def test_ipv4(self):
+ cli, srv = socket.socketpair(socket.AF_INET)
+ cli.close()
+ srv.close()
+
+ def _test_ipv4(self):
+ pass
+
+ @unittest.skipIf(not hasattr(_socket, 'IPPROTO_IPV6') or
+ not hasattr(_socket, 'IPV6_V6ONLY'),
+ "IPV6_V6ONLY option not supported")
+ @unittest.skipUnless(socket_helper.IPV6_ENABLED, 'IPv6 required for this test')
+ def test_ipv6(self):
+ cli, srv = socket.socketpair(socket.AF_INET6)
+ cli.close()
+ srv.close()
+
+ def _test_ipv6(self):
+ pass
+
+ def test_injected_authentication_failure(self):
+ orig_getsockname = socket.socket.getsockname
+ inject_sock = None
+
+ def inject_getsocketname(self):
+ nonlocal inject_sock
+ sockname = orig_getsockname(self)
+ # Connect to the listening socket ahead of the
+ # client socket.
+ if inject_sock is None:
+ inject_sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
+ inject_sock.setblocking(False)
+ try:
+ inject_sock.connect(sockname[:2])
+ except (BlockingIOError, InterruptedError):
+ pass
+ inject_sock.setblocking(True)
+ return sockname
+
+ sock1 = sock2 = None
+ try:
+ socket.socket.getsockname = inject_getsocketname
+ with self.assertRaises(OSError):
+ sock1, sock2 = socket.socketpair()
+ finally:
+ socket.socket.getsockname = orig_getsockname
+ if inject_sock:
+ inject_sock.close()
+ if sock1: # This cleanup isn't needed on a successful test.
+ sock1.close()
+ if sock2:
+ sock2.close()
+
+ def _test_injected_authentication_failure(self):
+ # No-op. Exists for base class threading infrastructure to call.
+ # We could refactor this test into its own lesser class along with the
+ # setUp and tearDown code to construct an ideal; it is simpler to keep
+ # it here and live with extra overhead one this _one_ failure test.
+ pass
+
+
class NonBlockingTCPTests(ThreadedTCPSocketTest):
def __init__(self, methodName='runTest'):
diff --git a/Misc/NEWS.d/next/Security/2024-07-22-13-11-28.gh-issue-122133.0mPeta.rst b/Misc/NEWS.d/next/Security/2024-07-22-13-11-28.gh-issue-122133.0mPeta.rst
new file mode 100644
index 00000000000..3544eb3824d
--- /dev/null
+++ b/Misc/NEWS.d/next/Security/2024-07-22-13-11-28.gh-issue-122133.0mPeta.rst
@@ -0,0 +1,5 @@
+Authenticate the socket connection for the ``socket.socketpair()`` fallback
+on platforms where ``AF_UNIX`` is not available like Windows.
+
+Patch by Gregory P. Smith <greg@krypto.org> and Seth Larson <seth@python.org>. Reported by Ellie
+<el@horse64.org>
--
2.33.0

View File

@ -0,0 +1,207 @@
From c5655aa6ad120d2ed7f255bebd6e8b71a9c07dde Mon Sep 17 00:00:00 2001
From: "Miss Islington (bot)"
<31488909+miss-islington@users.noreply.github.com>
Date: Fri, 2 Aug 2024 15:09:45 +0200
Subject: [PATCH] [3.11] gh-122133: Rework pure Python socketpair tests to
avoid use of importlib.reload. (GH-122493) (GH-122506)
(cherry picked from commit f071f01b7b7e19d7d6b3a4b0ec62f820ecb14660)
Co-authored-by: Russell Keith-Magee <russell@keith-magee.com>
Co-authored-by: Gregory P. Smith <greg@krypto.org>
---
Lib/socket.py | 121 +++++++++++++++++++---------------------
Lib/test/test_socket.py | 20 ++-----
2 files changed, 64 insertions(+), 77 deletions(-)
diff --git a/Lib/socket.py b/Lib/socket.py
index 591d4739a64..f386241abfb 100644
--- a/Lib/socket.py
+++ b/Lib/socket.py
@@ -590,16 +590,65 @@ def fromshare(info):
return socket(0, 0, 0, info)
__all__.append("fromshare")
-if hasattr(_socket, "socketpair"):
+# Origin: https://gist.github.com/4325783, by Geert Jansen. Public domain.
+# This is used if _socket doesn't natively provide socketpair. It's
+# always defined so that it can be patched in for testing purposes.
+def _fallback_socketpair(family=AF_INET, type=SOCK_STREAM, proto=0):
+ if family == AF_INET:
+ host = _LOCALHOST
+ elif family == AF_INET6:
+ host = _LOCALHOST_V6
+ else:
+ raise ValueError("Only AF_INET and AF_INET6 socket address families "
+ "are supported")
+ if type != SOCK_STREAM:
+ raise ValueError("Only SOCK_STREAM socket type is supported")
+ if proto != 0:
+ raise ValueError("Only protocol zero is supported")
+
+ # We create a connected TCP socket. Note the trick with
+ # setblocking(False) that prevents us from having to create a thread.
+ lsock = socket(family, type, proto)
+ try:
+ lsock.bind((host, 0))
+ lsock.listen()
+ # On IPv6, ignore flow_info and scope_id
+ addr, port = lsock.getsockname()[:2]
+ csock = socket(family, type, proto)
+ try:
+ csock.setblocking(False)
+ try:
+ csock.connect((addr, port))
+ except (BlockingIOError, InterruptedError):
+ pass
+ csock.setblocking(True)
+ ssock, _ = lsock.accept()
+ except:
+ csock.close()
+ raise
+ finally:
+ lsock.close()
- def socketpair(family=None, type=SOCK_STREAM, proto=0):
- """socketpair([family[, type[, proto]]]) -> (socket object, socket object)
+ # Authenticating avoids using a connection from something else
+ # able to connect to {host}:{port} instead of us.
+ # We expect only AF_INET and AF_INET6 families.
+ try:
+ if (
+ ssock.getsockname() != csock.getpeername()
+ or csock.getsockname() != ssock.getpeername()
+ ):
+ raise ConnectionError("Unexpected peer connection")
+ except:
+ # getsockname() and getpeername() can fail
+ # if either socket isn't connected.
+ ssock.close()
+ csock.close()
+ raise
- Create a pair of socket objects from the sockets returned by the platform
- socketpair() function.
- The arguments are the same as for socket() except the default family is
- AF_UNIX if defined on the platform; otherwise, the default is AF_INET.
- """
+ return (ssock, csock)
+
+if hasattr(_socket, "socketpair"):
+ def socketpair(family=None, type=SOCK_STREAM, proto=0):
if family is None:
try:
family = AF_UNIX
@@ -611,61 +660,7 @@ def socketpair(family=None, type=SOCK_STREAM, proto=0):
return a, b
else:
-
- # Origin: https://gist.github.com/4325783, by Geert Jansen. Public domain.
- def socketpair(family=AF_INET, type=SOCK_STREAM, proto=0):
- if family == AF_INET:
- host = _LOCALHOST
- elif family == AF_INET6:
- host = _LOCALHOST_V6
- else:
- raise ValueError("Only AF_INET and AF_INET6 socket address families "
- "are supported")
- if type != SOCK_STREAM:
- raise ValueError("Only SOCK_STREAM socket type is supported")
- if proto != 0:
- raise ValueError("Only protocol zero is supported")
-
- # We create a connected TCP socket. Note the trick with
- # setblocking(False) that prevents us from having to create a thread.
- lsock = socket(family, type, proto)
- try:
- lsock.bind((host, 0))
- lsock.listen()
- # On IPv6, ignore flow_info and scope_id
- addr, port = lsock.getsockname()[:2]
- csock = socket(family, type, proto)
- try:
- csock.setblocking(False)
- try:
- csock.connect((addr, port))
- except (BlockingIOError, InterruptedError):
- pass
- csock.setblocking(True)
- ssock, _ = lsock.accept()
- except:
- csock.close()
- raise
- finally:
- lsock.close()
-
- # Authenticating avoids using a connection from something else
- # able to connect to {host}:{port} instead of us.
- # We expect only AF_INET and AF_INET6 families.
- try:
- if (
- ssock.getsockname() != csock.getpeername()
- or csock.getsockname() != ssock.getpeername()
- ):
- raise ConnectionError("Unexpected peer connection")
- except:
- # getsockname() and getpeername() can fail
- # if either socket isn't connected.
- ssock.close()
- csock.close()
- raise
-
- return (ssock, csock)
+ socketpair = _fallback_socketpair
__all__.append("socketpair")
socketpair.__doc__ = """socketpair([family[, type[, proto]]]) -> (socket object, socket object)
diff --git a/Lib/test/test_socket.py b/Lib/test/test_socket.py
index a60eb436c7b..cc803d8753b 100644
--- a/Lib/test/test_socket.py
+++ b/Lib/test/test_socket.py
@@ -4676,7 +4676,6 @@ def _testSend(self):
class PurePythonSocketPairTest(SocketPairTest):
-
# Explicitly use socketpair AF_INET or AF_INET6 to ensure that is the
# code path we're using regardless platform is the pure python one where
# `_socket.socketpair` does not exist. (AF_INET does not work with
@@ -4691,28 +4690,21 @@ def socketpair(self):
# Local imports in this class make for easy security fix backporting.
def setUp(self):
- import _socket
- self._orig_sp = getattr(_socket, 'socketpair', None)
- if self._orig_sp is not None:
+ if hasattr(_socket, "socketpair"):
+ self._orig_sp = socket.socketpair
# This forces the version using the non-OS provided socketpair
# emulation via an AF_INET socket in Lib/socket.py.
- del _socket.socketpair
- import importlib
- global socket
- socket = importlib.reload(socket)
+ socket.socketpair = socket._fallback_socketpair
else:
- pass # This platform already uses the non-OS provided version.
+ # This platform already uses the non-OS provided version.
+ self._orig_sp = None
super().setUp()
def tearDown(self):
super().tearDown()
- import _socket
if self._orig_sp is not None:
# Restore the default socket.socketpair definition.
- _socket.socketpair = self._orig_sp
- import importlib
- global socket
- socket = importlib.reload(socket)
+ socket.socketpair = self._orig_sp
def test_recv(self):
msg = self.serv.recv(1024)
--
2.33.0

View File

@ -0,0 +1,247 @@
From d449caf8a179e3b954268b3a88eb9170be3c8fbf Mon Sep 17 00:00:00 2001
From: Seth Michael Larson <seth@python.org>
Date: Tue, 3 Sep 2024 10:07:13 -0500
Subject: [PATCH] [3.11] gh-121285: Remove backtracking when parsing tarfile
headers (GH-121286) (#123639)
* Remove backtracking when parsing tarfile headers
* Rewrite PAX header parsing to be stricter
* Optimize parsing of GNU extended sparse headers v0.0
(cherry picked from commit 34ddb64d088dd7ccc321f6103d23153256caa5d4)
Co-authored-by: Kirill Podoprigora <kirill.bast9@mail.ru>
Co-authored-by: Gregory P. Smith <greg@krypto.org>
---
Lib/tarfile.py | 105 +++++++++++-------
Lib/test/test_tarfile.py | 42 +++++++
...-07-02-13-39-20.gh-issue-121285.hrl-yI.rst | 2 +
3 files changed, 111 insertions(+), 38 deletions(-)
create mode 100644 Misc/NEWS.d/next/Security/2024-07-02-13-39-20.gh-issue-121285.hrl-yI.rst
diff --git a/Lib/tarfile.py b/Lib/tarfile.py
index 612217b1ad0..0d6b925533b 100755
--- a/Lib/tarfile.py
+++ b/Lib/tarfile.py
@@ -842,6 +842,9 @@ def data_filter(member, dest_path):
# Sentinel for replace() defaults, meaning "don't change the attribute"
_KEEP = object()
+# Header length is digits followed by a space.
+_header_length_prefix_re = re.compile(br"([0-9]{1,20}) ")
+
class TarInfo(object):
"""Informational class which holds the details about an
archive member given by a tar header block.
@@ -1411,41 +1414,59 @@ def _proc_pax(self, tarfile):
else:
pax_headers = tarfile.pax_headers.copy()
- # Check if the pax header contains a hdrcharset field. This tells us
- # the encoding of the path, linkpath, uname and gname fields. Normally,
- # these fields are UTF-8 encoded but since POSIX.1-2008 tar
- # implementations are allowed to store them as raw binary strings if
- # the translation to UTF-8 fails.
- match = re.search(br"\d+ hdrcharset=([^\n]+)\n", buf)
- if match is not None:
- pax_headers["hdrcharset"] = match.group(1).decode("utf-8")
-
- # For the time being, we don't care about anything other than "BINARY".
- # The only other value that is currently allowed by the standard is
- # "ISO-IR 10646 2000 UTF-8" in other words UTF-8.
- hdrcharset = pax_headers.get("hdrcharset")
- if hdrcharset == "BINARY":
- encoding = tarfile.encoding
- else:
- encoding = "utf-8"
-
# Parse pax header information. A record looks like that:
# "%d %s=%s\n" % (length, keyword, value). length is the size
# of the complete record including the length field itself and
- # the newline. keyword and value are both UTF-8 encoded strings.
- regex = re.compile(br"(\d+) ([^=]+)=")
+ # the newline.
pos = 0
- while True:
- match = regex.match(buf, pos)
- if not match:
- break
+ encoding = None
+ raw_headers = []
+ while len(buf) > pos and buf[pos] != 0x00:
+ if not (match := _header_length_prefix_re.match(buf, pos)):
+ raise InvalidHeaderError("invalid header")
+ try:
+ length = int(match.group(1))
+ except ValueError:
+ raise InvalidHeaderError("invalid header")
+ # Headers must be at least 5 bytes, shortest being '5 x=\n'.
+ # Value is allowed to be empty.
+ if length < 5:
+ raise InvalidHeaderError("invalid header")
+ if pos + length > len(buf):
+ raise InvalidHeaderError("invalid header")
- length, keyword = match.groups()
- length = int(length)
- if length == 0:
+ header_value_end_offset = match.start(1) + length - 1 # Last byte of the header
+ keyword_and_value = buf[match.end(1) + 1:header_value_end_offset]
+ raw_keyword, equals, raw_value = keyword_and_value.partition(b"=")
+
+ # Check the framing of the header. The last character must be '\n' (0x0A)
+ if not raw_keyword or equals != b"=" or buf[header_value_end_offset] != 0x0A:
raise InvalidHeaderError("invalid header")
- value = buf[match.end(2) + 1:match.start(1) + length - 1]
+ raw_headers.append((length, raw_keyword, raw_value))
+
+ # Check if the pax header contains a hdrcharset field. This tells us
+ # the encoding of the path, linkpath, uname and gname fields. Normally,
+ # these fields are UTF-8 encoded but since POSIX.1-2008 tar
+ # implementations are allowed to store them as raw binary strings if
+ # the translation to UTF-8 fails. For the time being, we don't care about
+ # anything other than "BINARY". The only other value that is currently
+ # allowed by the standard is "ISO-IR 10646 2000 UTF-8" in other words UTF-8.
+ # Note that we only follow the initial 'hdrcharset' setting to preserve
+ # the initial behavior of the 'tarfile' module.
+ if raw_keyword == b"hdrcharset" and encoding is None:
+ if raw_value == b"BINARY":
+ encoding = tarfile.encoding
+ else: # This branch ensures only the first 'hdrcharset' header is used.
+ encoding = "utf-8"
+
+ pos += length
+ # If no explicit hdrcharset is set, we use UTF-8 as a default.
+ if encoding is None:
+ encoding = "utf-8"
+
+ # After parsing the raw headers we can decode them to text.
+ for length, raw_keyword, raw_value in raw_headers:
# Normally, we could just use "utf-8" as the encoding and "strict"
# as the error handler, but we better not take the risk. For
# example, GNU tar <= 1.23 is known to store filenames it cannot
@@ -1453,17 +1474,16 @@ def _proc_pax(self, tarfile):
# hdrcharset=BINARY header).
# We first try the strict standard encoding, and if that fails we
# fall back on the user's encoding and error handler.
- keyword = self._decode_pax_field(keyword, "utf-8", "utf-8",
+ keyword = self._decode_pax_field(raw_keyword, "utf-8", "utf-8",
tarfile.errors)
if keyword in PAX_NAME_FIELDS:
- value = self._decode_pax_field(value, encoding, tarfile.encoding,
+ value = self._decode_pax_field(raw_value, encoding, tarfile.encoding,
tarfile.errors)
else:
- value = self._decode_pax_field(value, "utf-8", "utf-8",
+ value = self._decode_pax_field(raw_value, "utf-8", "utf-8",
tarfile.errors)
pax_headers[keyword] = value
- pos += length
# Fetch the next header.
try:
@@ -1478,7 +1498,7 @@ def _proc_pax(self, tarfile):
elif "GNU.sparse.size" in pax_headers:
# GNU extended sparse format version 0.0.
- self._proc_gnusparse_00(next, pax_headers, buf)
+ self._proc_gnusparse_00(next, raw_headers)
elif pax_headers.get("GNU.sparse.major") == "1" and pax_headers.get("GNU.sparse.minor") == "0":
# GNU extended sparse format version 1.0.
@@ -1500,15 +1520,24 @@ def _proc_pax(self, tarfile):
return next
- def _proc_gnusparse_00(self, next, pax_headers, buf):
+ def _proc_gnusparse_00(self, next, raw_headers):
"""Process a GNU tar extended sparse header, version 0.0.
"""
offsets = []
- for match in re.finditer(br"\d+ GNU.sparse.offset=(\d+)\n", buf):
- offsets.append(int(match.group(1)))
numbytes = []
- for match in re.finditer(br"\d+ GNU.sparse.numbytes=(\d+)\n", buf):
- numbytes.append(int(match.group(1)))
+ for _, keyword, value in raw_headers:
+ if keyword == b"GNU.sparse.offset":
+ try:
+ offsets.append(int(value.decode()))
+ except ValueError:
+ raise InvalidHeaderError("invalid header")
+
+ elif keyword == b"GNU.sparse.numbytes":
+ try:
+ numbytes.append(int(value.decode()))
+ except ValueError:
+ raise InvalidHeaderError("invalid header")
+
next.sparse = list(zip(offsets, numbytes))
def _proc_gnusparse_01(self, next, pax_headers):
diff --git a/Lib/test/test_tarfile.py b/Lib/test/test_tarfile.py
index 389da7be3a3..c99c88ce93a 100644
--- a/Lib/test/test_tarfile.py
+++ b/Lib/test/test_tarfile.py
@@ -1208,6 +1208,48 @@ def test_pax_number_fields(self):
finally:
tar.close()
+ def test_pax_header_bad_formats(self):
+ # The fields from the pax header have priority over the
+ # TarInfo.
+ pax_header_replacements = (
+ b" foo=bar\n",
+ b"0 \n",
+ b"1 \n",
+ b"2 \n",
+ b"3 =\n",
+ b"4 =a\n",
+ b"1000000 foo=bar\n",
+ b"0 foo=bar\n",
+ b"-12 foo=bar\n",
+ b"000000000000000000000000036 foo=bar\n",
+ )
+ pax_headers = {"foo": "bar"}
+
+ for replacement in pax_header_replacements:
+ with self.subTest(header=replacement):
+ tar = tarfile.open(tmpname, "w", format=tarfile.PAX_FORMAT,
+ encoding="iso8859-1")
+ try:
+ t = tarfile.TarInfo()
+ t.name = "pax" # non-ASCII
+ t.uid = 1
+ t.pax_headers = pax_headers
+ tar.addfile(t)
+ finally:
+ tar.close()
+
+ with open(tmpname, "rb") as f:
+ data = f.read()
+ self.assertIn(b"11 foo=bar\n", data)
+ data = data.replace(b"11 foo=bar\n", replacement)
+
+ with open(tmpname, "wb") as f:
+ f.truncate()
+ f.write(data)
+
+ with self.assertRaisesRegex(tarfile.ReadError, r"method tar: ReadError\('invalid header'\)"):
+ tarfile.open(tmpname, encoding="iso8859-1")
+
class WriteTestBase(TarTest):
# Put all write tests in here that are supposed to be tested
diff --git a/Misc/NEWS.d/next/Security/2024-07-02-13-39-20.gh-issue-121285.hrl-yI.rst b/Misc/NEWS.d/next/Security/2024-07-02-13-39-20.gh-issue-121285.hrl-yI.rst
new file mode 100644
index 00000000000..81f918bfe2b
--- /dev/null
+++ b/Misc/NEWS.d/next/Security/2024-07-02-13-39-20.gh-issue-121285.hrl-yI.rst
@@ -0,0 +1,2 @@
+Remove backtracking from tarfile header parsing for ``hdrcharset``, PAX, and
+GNU sparse headers.
--
2.33.0

View File

@ -0,0 +1,312 @@
From ae961ae94bf19c8f8c7fbea3d1c25cc55ce8ae97 Mon Sep 17 00:00:00 2001
From: Victor Stinner <vstinner@python.org>
Date: Fri, 1 Nov 2024 14:11:47 +0100
Subject: [PATCH] [3.11] gh-124651: Quote template strings in `venv` activation
scripts (GH-124712) (GH-126185) (#126269)
---
Lib/test/test_venv.py | 83 ++++++++++++++++++-
Lib/venv/__init__.py | 42 ++++++++--
Lib/venv/scripts/common/activate | 8 +-
Lib/venv/scripts/nt/activate.bat | 6 +-
Lib/venv/scripts/posix/activate.csh | 8 +-
Lib/venv/scripts/posix/activate.fish | 8 +-
...-09-28-02-03-04.gh-issue-124651.bLBGtH.rst | 1 +
7 files changed, 135 insertions(+), 21 deletions(-)
create mode 100644 Misc/NEWS.d/next/Library/2024-09-28-02-03-04.gh-issue-124651.bLBGtH.rst
diff --git a/Lib/test/test_venv.py b/Lib/test/test_venv.py
index 9563282e6d9b9c..d3abb77f40f35f 100644
--- a/Lib/test/test_venv.py
+++ b/Lib/test/test_venv.py
@@ -17,7 +17,8 @@
import sys
import sysconfig
import tempfile
-from test.support import (captured_stdout, captured_stderr, requires_zlib,
+import shlex
+from test.support import (captured_stdout, captured_stderr,
skip_if_broken_multiprocessing_synchronize, verbose,
requires_subprocess, is_emscripten, is_wasi,
requires_venv_with_pip, TEST_HOME_DIR,
@@ -96,6 +97,10 @@ def get_text_file_contents(self, *args, encoding='utf-8'):
result = f.read()
return result
+ def assertEndsWith(self, string, tail):
+ if not string.endswith(tail):
+ self.fail(f"String {string!r} does not end with {tail!r}")
+
class BasicTest(BaseTest):
"""Test venv module functionality."""
@@ -446,6 +451,82 @@ def test_executable_symlinks(self):
'import sys; print(sys.executable)'])
self.assertEqual(out.strip(), envpy.encode())
+ # gh-124651: test quoted strings
+ @unittest.skipIf(os.name == 'nt', 'contains invalid characters on Windows')
+ def test_special_chars_bash(self):
+ """
+ Test that the template strings are quoted properly (bash)
+ """
+ rmtree(self.env_dir)
+ bash = shutil.which('bash')
+ if bash is None:
+ self.skipTest('bash required for this test')
+ env_name = '"\';&&$e|\'"'
+ env_dir = os.path.join(os.path.realpath(self.env_dir), env_name)
+ builder = venv.EnvBuilder(clear=True)
+ builder.create(env_dir)
+ activate = os.path.join(env_dir, self.bindir, 'activate')
+ test_script = os.path.join(self.env_dir, 'test_special_chars.sh')
+ with open(test_script, "w") as f:
+ f.write(f'source {shlex.quote(activate)}\n'
+ 'python -c \'import sys; print(sys.executable)\'\n'
+ 'python -c \'import os; print(os.environ["VIRTUAL_ENV"])\'\n'
+ 'deactivate\n')
+ out, err = check_output([bash, test_script])
+ lines = out.splitlines()
+ self.assertTrue(env_name.encode() in lines[0])
+ self.assertEndsWith(lines[1], env_name.encode())
+
+ # gh-124651: test quoted strings
+ @unittest.skipIf(os.name == 'nt', 'contains invalid characters on Windows')
+ def test_special_chars_csh(self):
+ """
+ Test that the template strings are quoted properly (csh)
+ """
+ rmtree(self.env_dir)
+ csh = shutil.which('tcsh') or shutil.which('csh')
+ if csh is None:
+ self.skipTest('csh required for this test')
+ env_name = '"\';&&$e|\'"'
+ env_dir = os.path.join(os.path.realpath(self.env_dir), env_name)
+ builder = venv.EnvBuilder(clear=True)
+ builder.create(env_dir)
+ activate = os.path.join(env_dir, self.bindir, 'activate.csh')
+ test_script = os.path.join(self.env_dir, 'test_special_chars.csh')
+ with open(test_script, "w") as f:
+ f.write(f'source {shlex.quote(activate)}\n'
+ 'python -c \'import sys; print(sys.executable)\'\n'
+ 'python -c \'import os; print(os.environ["VIRTUAL_ENV"])\'\n'
+ 'deactivate\n')
+ out, err = check_output([csh, test_script])
+ lines = out.splitlines()
+ self.assertTrue(env_name.encode() in lines[0])
+ self.assertEndsWith(lines[1], env_name.encode())
+
+ # gh-124651: test quoted strings on Windows
+ @unittest.skipUnless(os.name == 'nt', 'only relevant on Windows')
+ def test_special_chars_windows(self):
+ """
+ Test that the template strings are quoted properly on Windows
+ """
+ rmtree(self.env_dir)
+ env_name = "'&&^$e"
+ env_dir = os.path.join(os.path.realpath(self.env_dir), env_name)
+ builder = venv.EnvBuilder(clear=True)
+ builder.create(env_dir)
+ activate = os.path.join(env_dir, self.bindir, 'activate.bat')
+ test_batch = os.path.join(self.env_dir, 'test_special_chars.bat')
+ with open(test_batch, "w") as f:
+ f.write('@echo off\n'
+ f'"{activate}" & '
+ f'{self.exe} -c "import sys; print(sys.executable)" & '
+ f'{self.exe} -c "import os; print(os.environ[\'VIRTUAL_ENV\'])" & '
+ 'deactivate')
+ out, err = check_output([test_batch])
+ lines = out.splitlines()
+ self.assertTrue(env_name.encode() in lines[0])
+ self.assertEndsWith(lines[1], env_name.encode())
+
@unittest.skipUnless(os.name == 'nt', 'only relevant on Windows')
def test_unicode_in_batch_file(self):
"""
diff --git a/Lib/venv/__init__.py b/Lib/venv/__init__.py
index 6bce3081088200..4403f2b1c4ef60 100644
--- a/Lib/venv/__init__.py
+++ b/Lib/venv/__init__.py
@@ -11,6 +11,7 @@
import sys
import sysconfig
import types
+import shlex
CORE_VENV_DEPS = ('pip', 'setuptools')
@@ -394,11 +395,41 @@ def replace_variables(self, text, context):
:param context: The information for the environment creation request
being processed.
"""
- text = text.replace('__VENV_DIR__', context.env_dir)
- text = text.replace('__VENV_NAME__', context.env_name)
- text = text.replace('__VENV_PROMPT__', context.prompt)
- text = text.replace('__VENV_BIN_NAME__', context.bin_name)
- text = text.replace('__VENV_PYTHON__', context.env_exe)
+ replacements = {
+ '__VENV_DIR__': context.env_dir,
+ '__VENV_NAME__': context.env_name,
+ '__VENV_PROMPT__': context.prompt,
+ '__VENV_BIN_NAME__': context.bin_name,
+ '__VENV_PYTHON__': context.env_exe,
+ }
+
+ def quote_ps1(s):
+ """
+ This should satisfy PowerShell quoting rules [1], unless the quoted
+ string is passed directly to Windows native commands [2].
+ [1]: https://learn.microsoft.com/en-us/powershell/module/microsoft.powershell.core/about/about_quoting_rules
+ [2]: https://learn.microsoft.com/en-us/powershell/module/microsoft.powershell.core/about/about_parsing#passing-arguments-that-contain-quote-characters
+ """
+ s = s.replace("'", "''")
+ return f"'{s}'"
+
+ def quote_bat(s):
+ return s
+
+ # gh-124651: need to quote the template strings properly
+ quote = shlex.quote
+ script_path = context.script_path
+ if script_path.endswith('.ps1'):
+ quote = quote_ps1
+ elif script_path.endswith('.bat'):
+ quote = quote_bat
+ else:
+ # fallbacks to POSIX shell compliant quote
+ quote = shlex.quote
+
+ replacements = {key: quote(s) for key, s in replacements.items()}
+ for key, quoted in replacements.items():
+ text = text.replace(key, quoted)
return text
def install_scripts(self, context, path):
@@ -438,6 +469,7 @@ def install_scripts(self, context, path):
with open(srcfile, 'rb') as f:
data = f.read()
if not srcfile.endswith(('.exe', '.pdb')):
+ context.script_path = srcfile
try:
data = data.decode('utf-8')
data = self.replace_variables(data, context)
diff --git a/Lib/venv/scripts/common/activate b/Lib/venv/scripts/common/activate
index 982da08163b12d..e86442b3578342 100644
--- a/Lib/venv/scripts/common/activate
+++ b/Lib/venv/scripts/common/activate
@@ -35,11 +35,11 @@ deactivate () {
# unset irrelevant variables
deactivate nondestructive
-VIRTUAL_ENV="__VENV_DIR__"
+VIRTUAL_ENV=__VENV_DIR__
export VIRTUAL_ENV
_OLD_VIRTUAL_PATH="$PATH"
-PATH="$VIRTUAL_ENV/__VENV_BIN_NAME__:$PATH"
+PATH="$VIRTUAL_ENV/"__VENV_BIN_NAME__":$PATH"
export PATH
# unset PYTHONHOME if set
@@ -52,9 +52,9 @@ fi
if [ -z "${VIRTUAL_ENV_DISABLE_PROMPT:-}" ] ; then
_OLD_VIRTUAL_PS1="${PS1:-}"
- PS1="__VENV_PROMPT__${PS1:-}"
+ PS1=__VENV_PROMPT__"${PS1:-}"
export PS1
- VIRTUAL_ENV_PROMPT="__VENV_PROMPT__"
+ VIRTUAL_ENV_PROMPT=__VENV_PROMPT__
export VIRTUAL_ENV_PROMPT
fi
diff --git a/Lib/venv/scripts/nt/activate.bat b/Lib/venv/scripts/nt/activate.bat
index c1c3c82ee37f10..715b21b13fbe35 100644
--- a/Lib/venv/scripts/nt/activate.bat
+++ b/Lib/venv/scripts/nt/activate.bat
@@ -8,7 +8,7 @@
"%SystemRoot%\System32\chcp.com" 65001 > nul
)
-set VIRTUAL_ENV=__VENV_DIR__
+set "VIRTUAL_ENV=__VENV_DIR__"
if not defined PROMPT set PROMPT=$P$G
@@ -24,8 +24,8 @@
if defined _OLD_VIRTUAL_PATH set PATH=%_OLD_VIRTUAL_PATH%
if not defined _OLD_VIRTUAL_PATH set _OLD_VIRTUAL_PATH=%PATH%
-set PATH=%VIRTUAL_ENV%\__VENV_BIN_NAME__;%PATH%
-set VIRTUAL_ENV_PROMPT=__VENV_PROMPT__
+set "PATH=%VIRTUAL_ENV%\__VENV_BIN_NAME__;%PATH%"
+set "VIRTUAL_ENV_PROMPT=__VENV_PROMPT__"
:END
if defined _OLD_CODEPAGE (
diff --git a/Lib/venv/scripts/posix/activate.csh b/Lib/venv/scripts/posix/activate.csh
index d6f697c55ed81c..c47702127eff71 100644
--- a/Lib/venv/scripts/posix/activate.csh
+++ b/Lib/venv/scripts/posix/activate.csh
@@ -8,17 +8,17 @@ alias deactivate 'test $?_OLD_VIRTUAL_PATH != 0 && setenv PATH "$_OLD_VIRTUAL_PA
# Unset irrelevant variables.
deactivate nondestructive
-setenv VIRTUAL_ENV "__VENV_DIR__"
+setenv VIRTUAL_ENV __VENV_DIR__
set _OLD_VIRTUAL_PATH="$PATH"
-setenv PATH "$VIRTUAL_ENV/__VENV_BIN_NAME__:$PATH"
+setenv PATH "$VIRTUAL_ENV/"__VENV_BIN_NAME__":$PATH"
set _OLD_VIRTUAL_PROMPT="$prompt"
if (! "$?VIRTUAL_ENV_DISABLE_PROMPT") then
- set prompt = "__VENV_PROMPT__$prompt"
- setenv VIRTUAL_ENV_PROMPT "__VENV_PROMPT__"
+ set prompt = __VENV_PROMPT__"$prompt"
+ setenv VIRTUAL_ENV_PROMPT __VENV_PROMPT__
endif
alias pydoc python -m pydoc
diff --git a/Lib/venv/scripts/posix/activate.fish b/Lib/venv/scripts/posix/activate.fish
index 9aa4446005f4d8..dc3a6c88270c18 100644
--- a/Lib/venv/scripts/posix/activate.fish
+++ b/Lib/venv/scripts/posix/activate.fish
@@ -33,10 +33,10 @@ end
# Unset irrelevant variables.
deactivate nondestructive
-set -gx VIRTUAL_ENV "__VENV_DIR__"
+set -gx VIRTUAL_ENV __VENV_DIR__
set -gx _OLD_VIRTUAL_PATH $PATH
-set -gx PATH "$VIRTUAL_ENV/__VENV_BIN_NAME__" $PATH
+set -gx PATH "$VIRTUAL_ENV/"__VENV_BIN_NAME__ $PATH
# Unset PYTHONHOME if set.
if set -q PYTHONHOME
@@ -56,7 +56,7 @@ if test -z "$VIRTUAL_ENV_DISABLE_PROMPT"
set -l old_status $status
# Output the venv prompt; color taken from the blue of the Python logo.
- printf "%s%s%s" (set_color 4B8BBE) "__VENV_PROMPT__" (set_color normal)
+ printf "%s%s%s" (set_color 4B8BBE) __VENV_PROMPT__ (set_color normal)
# Restore the return status of the previous command.
echo "exit $old_status" | .
@@ -65,5 +65,5 @@ if test -z "$VIRTUAL_ENV_DISABLE_PROMPT"
end
set -gx _OLD_FISH_PROMPT_OVERRIDE "$VIRTUAL_ENV"
- set -gx VIRTUAL_ENV_PROMPT "__VENV_PROMPT__"
+ set -gx VIRTUAL_ENV_PROMPT __VENV_PROMPT__
end
diff --git a/Misc/NEWS.d/next/Library/2024-09-28-02-03-04.gh-issue-124651.bLBGtH.rst b/Misc/NEWS.d/next/Library/2024-09-28-02-03-04.gh-issue-124651.bLBGtH.rst
new file mode 100644
index 00000000000000..17fc9171390dd9
--- /dev/null
+++ b/Misc/NEWS.d/next/Library/2024-09-28-02-03-04.gh-issue-124651.bLBGtH.rst
@@ -0,0 +1 @@
+Properly quote template strings in :mod:`venv` activation scripts.

View File

@ -0,0 +1,137 @@
From a7084f6075c9595ba60119ce8c62f1496f50c568 Mon Sep 17 00:00:00 2001
From: "Miss Islington (bot)"
<31488909+miss-islington@users.noreply.github.com>
Date: Sun, 2 Feb 2025 09:30:28 +0100
Subject: [PATCH] [3.12] gh-105704: Disallow square brackets (`[` and `]`) in
domain names for parsed URLs (GH-129418) (GH-129527)
gh-105704: Disallow square brackets (`[` and `]`) in domain names for parsed URLs (GH-129418)
* gh-105704: Disallow square brackets ( and ) in domain names for parsed URLs
* Use Sphinx references
* Add mismatched bracket test cases, fix news format
* Add more test coverage for ports
---------
(cherry picked from commit d89a5f6a6e65511a5f6e0618c4c30a7aa5aba56a)
Co-authored-by: Seth Michael Larson <seth@python.org>
Co-authored-by: Peter Bierma <zintensitydev@gmail.com>
---
Lib/test/test_urlparse.py | 37 ++++++++++++++++++-
Lib/urllib/parse.py | 20 +++++++++-
...-01-28-14-08-03.gh-issue-105704.EnhHxu.rst | 4 ++
3 files changed, 58 insertions(+), 3 deletions(-)
create mode 100644 Misc/NEWS.d/next/Security/2025-01-28-14-08-03.gh-issue-105704.EnhHxu.rst
diff --git a/Lib/test/test_urlparse.py b/Lib/test/test_urlparse.py
index 818e7e93dbbe11..5e429b9259fee7 100644
--- a/Lib/test/test_urlparse.py
+++ b/Lib/test/test_urlparse.py
@@ -1273,16 +1273,51 @@ def test_invalid_bracketed_hosts(self):
self.assertRaises(ValueError, urllib.parse.urlsplit, 'Scheme://user@[0439:23af::2309::fae7:1234]/Path?Query')
self.assertRaises(ValueError, urllib.parse.urlsplit, 'Scheme://user@[0439:23af:2309::fae7:1234:2342:438e:192.0.2.146]/Path?Query')
self.assertRaises(ValueError, urllib.parse.urlsplit, 'Scheme://user@]v6a.ip[/Path')
+ self.assertRaises(ValueError, urllib.parse.urlsplit, 'scheme://prefix.[v6a.ip]')
+ self.assertRaises(ValueError, urllib.parse.urlsplit, 'scheme://[v6a.ip].suffix')
+ self.assertRaises(ValueError, urllib.parse.urlsplit, 'scheme://prefix.[v6a.ip]/')
+ self.assertRaises(ValueError, urllib.parse.urlsplit, 'scheme://[v6a.ip].suffix/')
+ self.assertRaises(ValueError, urllib.parse.urlsplit, 'scheme://prefix.[v6a.ip]?')
+ self.assertRaises(ValueError, urllib.parse.urlsplit, 'scheme://[v6a.ip].suffix?')
+ self.assertRaises(ValueError, urllib.parse.urlsplit, 'scheme://prefix.[::1]')
+ self.assertRaises(ValueError, urllib.parse.urlsplit, 'scheme://[::1].suffix')
+ self.assertRaises(ValueError, urllib.parse.urlsplit, 'scheme://prefix.[::1]/')
+ self.assertRaises(ValueError, urllib.parse.urlsplit, 'scheme://[::1].suffix/')
+ self.assertRaises(ValueError, urllib.parse.urlsplit, 'scheme://prefix.[::1]?')
+ self.assertRaises(ValueError, urllib.parse.urlsplit, 'scheme://[::1].suffix?')
+ self.assertRaises(ValueError, urllib.parse.urlsplit, 'scheme://prefix.[::1]:a')
+ self.assertRaises(ValueError, urllib.parse.urlsplit, 'scheme://[::1].suffix:a')
+ self.assertRaises(ValueError, urllib.parse.urlsplit, 'scheme://prefix.[::1]:a1')
+ self.assertRaises(ValueError, urllib.parse.urlsplit, 'scheme://[::1].suffix:a1')
+ self.assertRaises(ValueError, urllib.parse.urlsplit, 'scheme://prefix.[::1]:1a')
+ self.assertRaises(ValueError, urllib.parse.urlsplit, 'scheme://[::1].suffix:1a')
+ self.assertRaises(ValueError, urllib.parse.urlsplit, 'scheme://prefix.[::1]:')
+ self.assertRaises(ValueError, urllib.parse.urlsplit, 'scheme://[::1].suffix:/')
+ self.assertRaises(ValueError, urllib.parse.urlsplit, 'scheme://prefix.[::1]:?')
+ self.assertRaises(ValueError, urllib.parse.urlsplit, 'scheme://user@prefix.[v6a.ip]')
+ self.assertRaises(ValueError, urllib.parse.urlsplit, 'scheme://user@[v6a.ip].suffix')
+ self.assertRaises(ValueError, urllib.parse.urlsplit, 'scheme://[v6a.ip')
+ self.assertRaises(ValueError, urllib.parse.urlsplit, 'scheme://v6a.ip]')
+ self.assertRaises(ValueError, urllib.parse.urlsplit, 'scheme://]v6a.ip[')
+ self.assertRaises(ValueError, urllib.parse.urlsplit, 'scheme://]v6a.ip')
+ self.assertRaises(ValueError, urllib.parse.urlsplit, 'scheme://v6a.ip[')
+ self.assertRaises(ValueError, urllib.parse.urlsplit, 'scheme://prefix.[v6a.ip')
+ self.assertRaises(ValueError, urllib.parse.urlsplit, 'scheme://v6a.ip].suffix')
+ self.assertRaises(ValueError, urllib.parse.urlsplit, 'scheme://prefix]v6a.ip[suffix')
+ self.assertRaises(ValueError, urllib.parse.urlsplit, 'scheme://prefix]v6a.ip')
+ self.assertRaises(ValueError, urllib.parse.urlsplit, 'scheme://v6a.ip[suffix')
def test_splitting_bracketed_hosts(self):
- p1 = urllib.parse.urlsplit('scheme://user@[v6a.ip]/path?query')
+ p1 = urllib.parse.urlsplit('scheme://user@[v6a.ip]:1234/path?query')
self.assertEqual(p1.hostname, 'v6a.ip')
self.assertEqual(p1.username, 'user')
self.assertEqual(p1.path, '/path')
+ self.assertEqual(p1.port, 1234)
p2 = urllib.parse.urlsplit('scheme://user@[0439:23af:2309::fae7%test]/path?query')
self.assertEqual(p2.hostname, '0439:23af:2309::fae7%test')
self.assertEqual(p2.username, 'user')
self.assertEqual(p2.path, '/path')
+ self.assertIs(p2.port, None)
p3 = urllib.parse.urlsplit('scheme://user@[0439:23af:2309::fae7:1234:192.0.2.146%test]/path?query')
self.assertEqual(p3.hostname, '0439:23af:2309::fae7:1234:192.0.2.146%test')
self.assertEqual(p3.username, 'user')
diff --git a/Lib/urllib/parse.py b/Lib/urllib/parse.py
index 24815952037fef..c72138a33ca6d4 100644
--- a/Lib/urllib/parse.py
+++ b/Lib/urllib/parse.py
@@ -436,6 +436,23 @@ def _checknetloc(netloc):
raise ValueError("netloc '" + netloc + "' contains invalid " +
"characters under NFKC normalization")
+def _check_bracketed_netloc(netloc):
+ # Note that this function must mirror the splitting
+ # done in NetlocResultMixins._hostinfo().
+ hostname_and_port = netloc.rpartition('@')[2]
+ before_bracket, have_open_br, bracketed = hostname_and_port.partition('[')
+ if have_open_br:
+ # No data is allowed before a bracket.
+ if before_bracket:
+ raise ValueError("Invalid IPv6 URL")
+ hostname, _, port = bracketed.partition(']')
+ # No data is allowed after the bracket but before the port delimiter.
+ if port and not port.startswith(":"):
+ raise ValueError("Invalid IPv6 URL")
+ else:
+ hostname, _, port = hostname_and_port.partition(':')
+ _check_bracketed_host(hostname)
+
# Valid bracketed hosts are defined in
# https://www.rfc-editor.org/rfc/rfc3986#page-49 and https://url.spec.whatwg.org/
def _check_bracketed_host(hostname):
@@ -496,8 +513,7 @@ def urlsplit(url, scheme='', allow_fragments=True):
(']' in netloc and '[' not in netloc)):
raise ValueError("Invalid IPv6 URL")
if '[' in netloc and ']' in netloc:
- bracketed_host = netloc.partition('[')[2].partition(']')[0]
- _check_bracketed_host(bracketed_host)
+ _check_bracketed_netloc(netloc)
if allow_fragments and '#' in url:
url, fragment = url.split('#', 1)
if '?' in url:
diff --git a/Misc/NEWS.d/next/Security/2025-01-28-14-08-03.gh-issue-105704.EnhHxu.rst b/Misc/NEWS.d/next/Security/2025-01-28-14-08-03.gh-issue-105704.EnhHxu.rst
new file mode 100644
index 00000000000000..bff1bc6b0d609c
--- /dev/null
+++ b/Misc/NEWS.d/next/Security/2025-01-28-14-08-03.gh-issue-105704.EnhHxu.rst
@@ -0,0 +1,4 @@
+When using :func:`urllib.parse.urlsplit` and :func:`urllib.parse.urlparse` host
+parsing would not reject domain names containing square brackets (``[`` and
+``]``). Square brackets are only valid for IPv6 and IPvFuture hosts according to
+`RFC 3986 Section 3.2.2 <https://www.rfc-editor.org/rfc/rfc3986#section-3.2.2>`__.

View File

@ -0,0 +1,265 @@
From f4529f55d3ea4e68f90fbdd2e8a37c43986aa1bf Mon Sep 17 00:00:00 2001
From: Thomas Dwyer <github@tomd.tel>
Date: Mon, 10 Jul 2023 18:00:55 -0500
Subject: [PATCH] [Backport] Fix parsing errors in email/_parseaddr.py
Reference: https://github.com/python/cpython/issues/102988
The e-mail module of Python 0 - 2.7.18, 3.x - 3.11 incorrectly parses e-mail addresses which contain a special character. This vulnerability allows attackers to send messages from e-mail addresses that would otherwise be rejected.
Offering: CloudBu CMP
CVE: CVE-2023-27043
---
Doc/library/email.utils.rst | 26 +++++-
Lib/email/utils.py | 61 ++++++++++++--
Lib/test/test_email/test_email.py | 81 ++++++++++++++++++-
...-06-13-20-52-24.gh-issue-102988.Kei7Vf.rst | 4 +
4 files changed, 162 insertions(+), 10 deletions(-)
create mode 100644 Misc/NEWS.d/next/Security/2023-06-13-20-52-24.gh-issue-102988.Kei7Vf.rst
diff --git a/Doc/library/email.utils.rst b/Doc/library/email.utils.rst
index 0e266b6..06f8169 100644
--- a/Doc/library/email.utils.rst
+++ b/Doc/library/email.utils.rst
@@ -67,6 +67,11 @@ of the new API.
*email address* parts. Returns a tuple of that information, unless the parse
fails, in which case a 2-tuple of ``('', '')`` is returned.
+ .. versionchanged:: 3.12
+ For security reasons, addresses that were ambiguous and could parse into
+ multiple different addresses now cause ``('', '')`` to be returned
+ instead of only one of the *potential* addresses.
+
.. function:: formataddr(pair, charset='utf-8')
@@ -89,7 +94,7 @@ of the new API.
This method returns a list of 2-tuples of the form returned by ``parseaddr()``.
*fieldvalues* is a sequence of header field values as might be returned by
:meth:`Message.get_all <email.message.Message.get_all>`. Here's a simple
- example that gets all the recipients of a message::
+ example that gets all the recipients of a message:
from email.utils import getaddresses
@@ -99,6 +104,25 @@ of the new API.
resent_ccs = msg.get_all('resent-cc', [])
all_recipients = getaddresses(tos + ccs + resent_tos + resent_ccs)
+ When parsing fails for a single fieldvalue, a 2-tuple of ``('', '')``
+ is returned in its place. Other errors in parsing the list of
+ addresses such as a fieldvalue seemingly parsing into multiple
+ addresses may result in a list containing a single empty 2-tuple
+ ``[('', '')]`` being returned rather than returning potentially
+ invalid output.
+
+ Example malformed input parsing:
+
+ .. doctest::
+
+ >>> from email.utils import getaddresses
+ >>> getaddresses(['alice@example.com <bob@example.com>', 'me@example.com'])
+ [('', '')]
+
+ .. versionchanged:: 3.12
+ The 2-tuple of ``('', '')`` in the returned values when parsing
+ fails were added as to address a security issue.
+
.. function:: parsedate(date)
diff --git a/Lib/email/utils.py b/Lib/email/utils.py
index cfdfeb3..f8e867a 100644
--- a/Lib/email/utils.py
+++ b/Lib/email/utils.py
@@ -105,13 +105,53 @@ def formataddr(pair, charset='utf-8'):
return '%s%s%s <%s>' % (quotes, name, quotes, address)
return address
+def _pre_parse_validation(email_header_fields):
+ accepted_values = []
+ for v in email_header_fields:
+ s = v.replace('\\(', '').replace('\\)', '')
+ if s.count('(') != s.count(')'):
+ v = "('', '')"
+ accepted_values.append(v)
+ return accepted_values
+
+
+def _post_parse_validation(parsed_email_header_tuples):
+ accepted_values = []
+ # The parser would have parsed a correctly formatted domain-literal
+ # The existence of an [ after parsing indicates a parsing failure
+ for v in parsed_email_header_tuples:
+ if '[' in v[1]:
+ v = ('', '')
+ accepted_values.append(v)
+
+ return accepted_values
def getaddresses(fieldvalues):
- """Return a list of (REALNAME, EMAIL) for each fieldvalue."""
- all = COMMASPACE.join(str(v) for v in fieldvalues)
+ """Return a list of (REALNAME, EMAIL) or ('','') for each fieldvalue.
+
+ When parsing fails for a fieldvalue, a 2-tuple of ('', '') is returned in
+ its place.
+
+ If the resulting list of parsed address is not the same as the number of
+ fieldvalues in the input list a parsing error has occurred. A list
+ containing a single empty 2-tuple [('', '')] is returned in its place.
+ This is done to avoid invalid output.
+ """
+ fieldvalues = [str(v) for v in fieldvalues]
+ fieldvalues = _pre_parse_validation(fieldvalues)
+ all = COMMASPACE.join(v for v in fieldvalues)
a = _AddressList(all)
- return a.addresslist
+ result = _post_parse_validation(a.addresslist)
+
+ n = 0
+ for v in fieldvalues:
+ n += v.count(',') + 1
+
+ if len(result) != n:
+ return [('', '')]
+
+ return result
def _format_timetuple_and_zone(timetuple, zone):
@@ -212,9 +252,18 @@ def parseaddr(addr):
Return a tuple of realname and email address, unless the parse fails, in
which case return a 2-tuple of ('', '').
"""
- addrs = _AddressList(addr).addresslist
- if not addrs:
- return '', ''
+ if isinstance(addr, list):
+ addr = addr[0]
+
+ if not isinstance(addr, str):
+ return ('', '')
+
+ addr = _pre_parse_validation([addr])[0]
+ addrs = _post_parse_validation(_AddressList(addr).addresslist)
+
+ if not addrs or len(addrs) > 1:
+ return ('', '')
+
return addrs[0]
diff --git a/Lib/test/test_email/test_email.py b/Lib/test/test_email/test_email.py
index 677f209..f43d586 100644
--- a/Lib/test/test_email/test_email.py
+++ b/Lib/test/test_email/test_email.py
@@ -3321,15 +3321,90 @@ Foo
[('Al Person', 'aperson@dom.ain'),
('Bud Person', 'bperson@dom.ain')])
+ def test_getaddresses_parsing_errors(self):
+ """Test for parsing errors from CVE-2023-27043"""
+ eq = self.assertEqual
+ eq(utils.getaddresses(['alice@example.org(<bob@example.com>']),
+ [('', '')])
+ eq(utils.getaddresses(['alice@example.org)<bob@example.com>']),
+ [('', '')])
+ eq(utils.getaddresses(['alice@example.org<<bob@example.com>']),
+ [('', '')])
+ eq(utils.getaddresses(['alice@example.org><bob@example.com>']),
+ [('', '')])
+ eq(utils.getaddresses(['alice@example.org@<bob@example.com>']),
+ [('', '')])
+ eq(utils.getaddresses(['alice@example.org,<bob@example.com>']),
+ [('', 'alice@example.org'), ('', 'bob@example.com')])
+ eq(utils.getaddresses(['alice@example.org;<bob@example.com>']),
+ [('', '')])
+ eq(utils.getaddresses(['alice@example.org:<bob@example.com>']),
+ [('', '')])
+ eq(utils.getaddresses(['alice@example.org.<bob@example.com>']),
+ [('', '')])
+ eq(utils.getaddresses(['alice@example.org"<bob@example.com>']),
+ [('', '')])
+ eq(utils.getaddresses(['alice@example.org[<bob@example.com>']),
+ [('', '')])
+ eq(utils.getaddresses(['alice@example.org]<bob@example.com>']),
+ [('', '')])
+
+ def test_parseaddr_parsing_errors(self):
+ """Test for parsing errors from CVE-2023-27043"""
+ eq = self.assertEqual
+ eq(utils.parseaddr(['alice@example.org(<bob@example.com>']),
+ ('', ''))
+ eq(utils.parseaddr(['alice@example.org)<bob@example.com>']),
+ ('', ''))
+ eq(utils.parseaddr(['alice@example.org<<bob@example.com>']),
+ ('', ''))
+ eq(utils.parseaddr(['alice@example.org><bob@example.com>']),
+ ('', ''))
+ eq(utils.parseaddr(['alice@example.org@<bob@example.com>']),
+ ('', ''))
+ eq(utils.parseaddr(['alice@example.org,<bob@example.com>']),
+ ('', ''))
+ eq(utils.parseaddr(['alice@example.org;<bob@example.com>']),
+ ('', ''))
+ eq(utils.parseaddr(['alice@example.org:<bob@example.com>']),
+ ('', ''))
+ eq(utils.parseaddr(['alice@example.org.<bob@example.com>']),
+ ('', ''))
+ eq(utils.parseaddr(['alice@example.org"<bob@example.com>']),
+ ('', ''))
+ eq(utils.parseaddr(['alice@example.org[<bob@example.com>']),
+ ('', ''))
+ eq(utils.parseaddr(['alice@example.org]<bob@example.com>']),
+ ('', ''))
+
def test_getaddresses_nasty(self):
eq = self.assertEqual
eq(utils.getaddresses(['foo: ;']), [('', '')])
- eq(utils.getaddresses(
- ['[]*-- =~$']),
- [('', ''), ('', ''), ('', '*--')])
+ eq(utils.getaddresses(['[]*-- =~$']), [('', '')])
eq(utils.getaddresses(
['foo: ;', '"Jason R. Mastaler" <jason@dom.ain>']),
[('', ''), ('Jason R. Mastaler', 'jason@dom.ain')])
+ eq(utils.getaddresses(
+ [r'Pete(A nice \) chap) <pete(his account)@silly.test(his host)>']),
+ [('Pete (A nice ) chap his account his host)', 'pete@silly.test')])
+ eq(utils.getaddresses(
+ ['(Empty list)(start)Undisclosed recipients :(nobody(I know))']),
+ [('', '')])
+ eq(utils.getaddresses(
+ ['Mary <@machine.tld:mary@example.net>, , jdoe@test . example']),
+ [('Mary', 'mary@example.net'), ('', ''), ('', 'jdoe@test.example')])
+ eq(utils.getaddresses(
+ ['John Doe <jdoe@machine(comment). example>']),
+ [('John Doe (comment)', 'jdoe@machine.example')])
+ eq(utils.getaddresses(
+ ['"Mary Smith: Personal Account" <smith@home.example>']),
+ [('Mary Smith: Personal Account', 'smith@home.example')])
+ eq(utils.getaddresses(
+ ['Undisclosed recipients:;']),
+ [('', '')])
+ eq(utils.getaddresses(
+ [r'<boss@nil.test>, "Giant; \"Big\" Box" <bob@example.net>']),
+ [('', 'boss@nil.test'), ('Giant; "Big" Box', 'bob@example.net')])
def test_getaddresses_embedded_comment(self):
"""Test proper handling of a nested comment"""
diff --git a/Misc/NEWS.d/next/Security/2023-06-13-20-52-24.gh-issue-102988.Kei7Vf.rst b/Misc/NEWS.d/next/Security/2023-06-13-20-52-24.gh-issue-102988.Kei7Vf.rst
new file mode 100644
index 0000000..e0434cc
--- /dev/null
+++ b/Misc/NEWS.d/next/Security/2023-06-13-20-52-24.gh-issue-102988.Kei7Vf.rst
@@ -0,0 +1,4 @@
+CVE-2023-27043: Prevent :func:`email.utils.parseaddr`
+and :func:`email.utils.getaddresses` from returning the realname portion of an
+invalid RFC2822 email header in the email address portion of the 2-tuple
+returned after being parsed by :class:`email._parseaddr.AddressList`.
--
2.27.0

View File

@ -0,0 +1,284 @@
From a6f6faf3b669c409804fcaf4e62dd9bd257d2178 Mon Sep 17 00:00:00 2001
From: "Gregory P. Smith" <greg@krypto.org>
Date: Thu, 20 Jul 2023 20:30:52 -0700
Subject: [PATCH] [Backport] Revert fixes for CVE-2023-27043
Reference: https://github.com/python/cpython/pull/106733
Revert "gh-102988: Detect email address parsing errors and return empty tuple to indicate the parsing error (old API) (#105127)"
This reverts commit and adds the regression test suggested in the issue.
Offering: CloudBu CMP
CVE: CVE-2023-27043
---
Doc/library/email.utils.rst | 26 +----
Lib/email/utils.py | 63 ++----------
Lib/test/test_email/test_email.py | 96 ++++---------------
...-06-13-20-52-24.gh-issue-102988.Kei7Vf.rst | 8 +-
4 files changed, 30 insertions(+), 163 deletions(-)
diff --git a/Doc/library/email.utils.rst b/Doc/library/email.utils.rst
index 06f8169..0e266b6 100644
--- a/Doc/library/email.utils.rst
+++ b/Doc/library/email.utils.rst
@@ -67,11 +67,6 @@ of the new API.
*email address* parts. Returns a tuple of that information, unless the parse
fails, in which case a 2-tuple of ``('', '')`` is returned.
- .. versionchanged:: 3.12
- For security reasons, addresses that were ambiguous and could parse into
- multiple different addresses now cause ``('', '')`` to be returned
- instead of only one of the *potential* addresses.
-
.. function:: formataddr(pair, charset='utf-8')
@@ -94,7 +89,7 @@ of the new API.
This method returns a list of 2-tuples of the form returned by ``parseaddr()``.
*fieldvalues* is a sequence of header field values as might be returned by
:meth:`Message.get_all <email.message.Message.get_all>`. Here's a simple
- example that gets all the recipients of a message:
+ example that gets all the recipients of a message::
from email.utils import getaddresses
@@ -104,25 +99,6 @@ of the new API.
resent_ccs = msg.get_all('resent-cc', [])
all_recipients = getaddresses(tos + ccs + resent_tos + resent_ccs)
- When parsing fails for a single fieldvalue, a 2-tuple of ``('', '')``
- is returned in its place. Other errors in parsing the list of
- addresses such as a fieldvalue seemingly parsing into multiple
- addresses may result in a list containing a single empty 2-tuple
- ``[('', '')]`` being returned rather than returning potentially
- invalid output.
-
- Example malformed input parsing:
-
- .. doctest::
-
- >>> from email.utils import getaddresses
- >>> getaddresses(['alice@example.com <bob@example.com>', 'me@example.com'])
- [('', '')]
-
- .. versionchanged:: 3.12
- The 2-tuple of ``('', '')`` in the returned values when parsing
- fails were added as to address a security issue.
-
.. function:: parsedate(date)
diff --git a/Lib/email/utils.py b/Lib/email/utils.py
index f8e867a..95620b7 100644
--- a/Lib/email/utils.py
+++ b/Lib/email/utils.py
@@ -105,53 +105,11 @@ def formataddr(pair, charset='utf-8'):
return '%s%s%s <%s>' % (quotes, name, quotes, address)
return address
-def _pre_parse_validation(email_header_fields):
- accepted_values = []
- for v in email_header_fields:
- s = v.replace('\\(', '').replace('\\)', '')
- if s.count('(') != s.count(')'):
- v = "('', '')"
- accepted_values.append(v)
-
- return accepted_values
-
-
-def _post_parse_validation(parsed_email_header_tuples):
- accepted_values = []
- # The parser would have parsed a correctly formatted domain-literal
- # The existence of an [ after parsing indicates a parsing failure
- for v in parsed_email_header_tuples:
- if '[' in v[1]:
- v = ('', '')
- accepted_values.append(v)
-
- return accepted_values
-
def getaddresses(fieldvalues):
- """Return a list of (REALNAME, EMAIL) or ('','') for each fieldvalue.
-
- When parsing fails for a fieldvalue, a 2-tuple of ('', '') is returned in
- its place.
-
- If the resulting list of parsed address is not the same as the number of
- fieldvalues in the input list a parsing error has occurred. A list
- containing a single empty 2-tuple [('', '')] is returned in its place.
- This is done to avoid invalid output.
- """
- fieldvalues = [str(v) for v in fieldvalues]
- fieldvalues = _pre_parse_validation(fieldvalues)
- all = COMMASPACE.join(v for v in fieldvalues)
+ """Return a list of (REALNAME, EMAIL) for each fieldvalue."""
+ all = COMMASPACE.join(str(v) for v in fieldvalues)
a = _AddressList(all)
- result = _post_parse_validation(a.addresslist)
-
- n = 0
- for v in fieldvalues:
- n += v.count(',') + 1
-
- if len(result) != n:
- return [('', '')]
-
- return result
+ return a.addresslist
def _format_timetuple_and_zone(timetuple, zone):
@@ -252,18 +210,9 @@ def parseaddr(addr):
Return a tuple of realname and email address, unless the parse fails, in
which case return a 2-tuple of ('', '').
"""
- if isinstance(addr, list):
- addr = addr[0]
-
- if not isinstance(addr, str):
- return ('', '')
-
- addr = _pre_parse_validation([addr])[0]
- addrs = _post_parse_validation(_AddressList(addr).addresslist)
-
- if not addrs or len(addrs) > 1:
- return ('', '')
-
+ addrs = _AddressList(addr).addresslist
+ if not addrs:
+ return '', ''
return addrs[0]
diff --git a/Lib/test/test_email/test_email.py b/Lib/test/test_email/test_email.py
index f43d586..473a488 100644
--- a/Lib/test/test_email/test_email.py
+++ b/Lib/test/test_email/test_email.py
@@ -3321,90 +3321,32 @@ Foo
[('Al Person', 'aperson@dom.ain'),
('Bud Person', 'bperson@dom.ain')])
- def test_getaddresses_parsing_errors(self):
- """Test for parsing errors from CVE-2023-27043"""
- eq = self.assertEqual
- eq(utils.getaddresses(['alice@example.org(<bob@example.com>']),
- [('', '')])
- eq(utils.getaddresses(['alice@example.org)<bob@example.com>']),
- [('', '')])
- eq(utils.getaddresses(['alice@example.org<<bob@example.com>']),
- [('', '')])
- eq(utils.getaddresses(['alice@example.org><bob@example.com>']),
- [('', '')])
- eq(utils.getaddresses(['alice@example.org@<bob@example.com>']),
- [('', '')])
- eq(utils.getaddresses(['alice@example.org,<bob@example.com>']),
- [('', 'alice@example.org'), ('', 'bob@example.com')])
- eq(utils.getaddresses(['alice@example.org;<bob@example.com>']),
- [('', '')])
- eq(utils.getaddresses(['alice@example.org:<bob@example.com>']),
- [('', '')])
- eq(utils.getaddresses(['alice@example.org.<bob@example.com>']),
- [('', '')])
- eq(utils.getaddresses(['alice@example.org"<bob@example.com>']),
- [('', '')])
- eq(utils.getaddresses(['alice@example.org[<bob@example.com>']),
- [('', '')])
- eq(utils.getaddresses(['alice@example.org]<bob@example.com>']),
- [('', '')])
-
- def test_parseaddr_parsing_errors(self):
- """Test for parsing errors from CVE-2023-27043"""
- eq = self.assertEqual
- eq(utils.parseaddr(['alice@example.org(<bob@example.com>']),
- ('', ''))
- eq(utils.parseaddr(['alice@example.org)<bob@example.com>']),
- ('', ''))
- eq(utils.parseaddr(['alice@example.org<<bob@example.com>']),
- ('', ''))
- eq(utils.parseaddr(['alice@example.org><bob@example.com>']),
- ('', ''))
- eq(utils.parseaddr(['alice@example.org@<bob@example.com>']),
- ('', ''))
- eq(utils.parseaddr(['alice@example.org,<bob@example.com>']),
- ('', ''))
- eq(utils.parseaddr(['alice@example.org;<bob@example.com>']),
- ('', ''))
- eq(utils.parseaddr(['alice@example.org:<bob@example.com>']),
- ('', ''))
- eq(utils.parseaddr(['alice@example.org.<bob@example.com>']),
- ('', ''))
- eq(utils.parseaddr(['alice@example.org"<bob@example.com>']),
- ('', ''))
- eq(utils.parseaddr(['alice@example.org[<bob@example.com>']),
- ('', ''))
- eq(utils.parseaddr(['alice@example.org]<bob@example.com>']),
- ('', ''))
+ def test_getaddresses_comma_in_name(self):
+ """GH-106669 regression test."""
+ self.assertEqual(
+ utils.getaddresses(
+ [
+ '"Bud, Person" <bperson@dom.ain>',
+ 'aperson@dom.ain (Al Person)',
+ '"Mariusz Felisiak" <to@example.com>',
+ ]
+ ),
+ [
+ ('Bud, Person', 'bperson@dom.ain'),
+ ('Al Person', 'aperson@dom.ain'),
+ ('Mariusz Felisiak', 'to@example.com'),
+ ],
+ )
def test_getaddresses_nasty(self):
eq = self.assertEqual
eq(utils.getaddresses(['foo: ;']), [('', '')])
- eq(utils.getaddresses(['[]*-- =~$']), [('', '')])
+ eq(utils.getaddresses(
+ ['[]*-- =~$']),
+ [('', ''), ('', ''), ('', '*--')])
eq(utils.getaddresses(
['foo: ;', '"Jason R. Mastaler" <jason@dom.ain>']),
[('', ''), ('Jason R. Mastaler', 'jason@dom.ain')])
- eq(utils.getaddresses(
- [r'Pete(A nice \) chap) <pete(his account)@silly.test(his host)>']),
- [('Pete (A nice ) chap his account his host)', 'pete@silly.test')])
- eq(utils.getaddresses(
- ['(Empty list)(start)Undisclosed recipients :(nobody(I know))']),
- [('', '')])
- eq(utils.getaddresses(
- ['Mary <@machine.tld:mary@example.net>, , jdoe@test . example']),
- [('Mary', 'mary@example.net'), ('', ''), ('', 'jdoe@test.example')])
- eq(utils.getaddresses(
- ['John Doe <jdoe@machine(comment). example>']),
- [('John Doe (comment)', 'jdoe@machine.example')])
- eq(utils.getaddresses(
- ['"Mary Smith: Personal Account" <smith@home.example>']),
- [('Mary Smith: Personal Account', 'smith@home.example')])
- eq(utils.getaddresses(
- ['Undisclosed recipients:;']),
- [('', '')])
- eq(utils.getaddresses(
- [r'<boss@nil.test>, "Giant; \"Big\" Box" <bob@example.net>']),
- [('', 'boss@nil.test'), ('Giant; "Big" Box', 'bob@example.net')])
def test_getaddresses_embedded_comment(self):
"""Test proper handling of a nested comment"""
diff --git a/Misc/NEWS.d/next/Security/2023-06-13-20-52-24.gh-issue-102988.Kei7Vf.rst b/Misc/NEWS.d/next/Security/2023-06-13-20-52-24.gh-issue-102988.Kei7Vf.rst
index e0434cc..c67ec45 100644
--- a/Misc/NEWS.d/next/Security/2023-06-13-20-52-24.gh-issue-102988.Kei7Vf.rst
+++ b/Misc/NEWS.d/next/Security/2023-06-13-20-52-24.gh-issue-102988.Kei7Vf.rst
@@ -1,4 +1,4 @@
-CVE-2023-27043: Prevent :func:`email.utils.parseaddr`
-and :func:`email.utils.getaddresses` from returning the realname portion of an
-invalid RFC2822 email header in the email address portion of the 2-tuple
-returned after being parsed by :class:`email._parseaddr.AddressList`.
+Reverted the :mod:`email.utils` security improvement change released in
+3.12beta4 that unintentionally caused :mod:`email.utils.getaddresses` to fail
+to parse email addresses with a comma in the quoted name field.
+See :gh:`106669`.
--
2.27.0

View File

@ -3,7 +3,7 @@ Summary: Interpreter of the Python3 programming language
URL: https://www.python.org/
Version: 3.11.6
Release: 6
Release: 11
License: Python-2.0
%global branchversion 3.11
@ -88,15 +88,26 @@ Source1: pyconfig.h
Patch1: 00001-rpath.patch
Patch251: 00251-change-user-install-location.patch
Patch6000: backport-3.11-gh-114572-Fix-locking-in-cert_store_stats-and-g.patch
Patch6001: backport-3.11-gh-113171-gh-65056-Fix-private-non-global-IP-ad.patch
Patch6000: backport-CVE-2024-0397-gh-114572-Fix-locking-in-cert_store_stats-and-g.patch
Patch6001: backport-CVE-2024-4032-gh-113171-gh-65056-Fix-private-non-global-IP-ad.patch
Patch6002: backport-fix_xml_tree_assert_error.patch
Patch6003: backport-gh-121650-Encode-newlines-in-headers-and-verify-head.patch
Patch6004: backport-gh-123067-Fix-quadratic-complexity-in-parsing-quoted.patch
Patch6005: backport-gh-123270-Replaced-SanitizedNames-with-a-more-surgic.patch
Patch6003: backport-CVE-2024-6923-gh-121650-Encode-newlines-in-headers-and-verify-head.patch
Patch6004: backport-CVE-2024-7592-gh-123067-Fix-quadratic-complexity-in-parsing-quoted.patch
Patch6005: backport-CVE-2024-8088-gh-123270-Replaced-SanitizedNames-with-a-more-surgic.patch
Patch6006: backport-CVE-2024-6232-gh-121285-Remove-backtracking-when-parsing-tarf.patch
Patch6007: backport-CVE-2024-3219-1-gh-122133-Authenticate-socket-connection-for-so.patch
Patch6008: backport-CVE-2024-3219-2-gh-122133-Rework-pure-Python-socketpair-tests-t.patch
Patch6009: backport-CVE-2023-6597-gh-91133-tempfile.TemporaryDirectory-fix-symlin.patch
Patch6010: backport-CVE-2024-0450-gh-109858-Protect-zipfile-from-quoted-overlap-z.patch
Patch6011: backport-CVE-2024-9287.patch
Patch6012: backport-Fix-parsing-errors-in-email-_parseaddr.py.patch
Patch6013: backport-Revert-fixes-for-CVE-2023-27043.patch
Patch6014: backport-CVE-2023-27043.patch
Patch6015: backport-CVE-2025-0938.patch
Patch9000: add-the-sm3-method-for-obtaining-the-salt-value.patch
Patch9001: 0001-add-loongarch64-support-for-python.patch
Patch9002: 0001-expected_algs-list-to-include-TLS_SM4.patch
Provides: python%{branchversion} = %{version}-%{release}
Provides: python(abi) = %{branchversion}
@ -189,19 +200,29 @@ rm -r Modules/expat
rm Lib/ensurepip/_bundled/*.whl
rm configure pyconfig.h.in
%patch1 -p1
%patch251 -p1
%patch -P1 -p1
%patch -P251 -p1
%patch6000 -p1
%patch6001 -p1
%patch6002 -p1
%patch6003 -p1
%patch6004 -p1
%patch6005 -p1
%patch -P6000 -p1
%patch -P6001 -p1
%patch -P6002 -p1
%patch -P6003 -p1
%patch -P6004 -p1
%patch -P6005 -p1
%patch -P6006 -p1
%patch -P6007 -p1
%patch -P6008 -p1
%patch -P6009 -p1
%patch -P6010 -p1
%patch -P6011 -p1
%patch -P6012 -p1
%patch -P6013 -p1
%patch -P6014 -p1
%patch -P6015 -p1
%patch9000 -p1
%patch9001 -p1
%patch -P9000 -p1
%patch -P9001 -p1
%patch -P9002 -p1
%build
autoconf
@ -863,6 +884,43 @@ export BEP_GTDLIST="$BEP_GTDLIST_TMP"
%{_mandir}/*/*
%changelog
* Tue Feb 11 2025 Funda Wang <fundawang@yeah.net> - 3.11.6-11
- Type:CVE
- CVE:CVE-2025-0938
- SUG:NA
- DESC:fix CVE-2025-0938
* Tue Dec 10 2024 GuoCe <guoce@kylinos.cn> - 3.11.6-10
- Type:update
- CVE:NA
- SUG:NA
- DESC:support TLS_SM4
* Mon Nov 25 2024 xinsheng <xinsheng3@huawei.com> -3.11.6-9
- Type:CVE
- CVE:CVE-2023-27043
- SUG:NA
- DESC:fix CVE-2023-27043
* Tue Nov 12 2024 Funda Wang <fundawang@yeah.net> - 3.11.6-8
- Type:CVE
- CVE:CVE-2024-9287
- SUG:NA
- DESC:fix CVE-2024-9287
- Quote template strings in venv activation scripts
* Tue Sep 24 2024 xinsheng <xinsheng3@huawei.com> - 3.11.6-7
- Type:CVE
- CVE:CVE-2024-6232,CVE-2024-3219,CVE-2024-0450,CVE-2023-6597
- SUG:NA
- DESC:fix CVE-2024-6232,CVE-2024-3219,CVE-2024-0450,CVE-2023-6597
- rename all CVE patch name
- CVE-2024-6232: Remove backtracking when parsing tarfile headers
- CVE-2024-3219: patch1 Authenticate socket connection for `socket.socketpair()` fallback
- CVE-2024-3219: patch2 Rework pure Python socketpair tests to avoid use of importlib.reload.
- CVE-2024-0450: Protect zipfile from "quoted-overlap" zipbomb
- CVE-2023-6597: tempfile.TemporaryDirectory: fix symlink bug in cleanup
* Tue Sep 03 2024 xinsheng <xinsheng3@huawei.com> - 3.11.6-6
- Type:CVE
- CVE:NA