Skip to content

Commit ae7724a

Browse files
committed
gh-150751: validate http.client Content-Length and chunk-size
RFC 9112 defines Content-Length as 1*DIGIT and chunk-size as 1*HEXDIG, but int() also accepts a sign, underscores, surrounding whitespace and an 0x prefix, so malformed framing values were parsed instead of rejected.
1 parent 27ebd9a commit ae7724a

3 files changed

Lines changed: 43 additions & 8 deletions

File tree

Lib/http/client.py

Lines changed: 12 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -158,6 +158,13 @@
158158
# to prevent http header injection.
159159
_contains_disallowed_method_pchar_re = re.compile('[\x00-\x1f]')
160160

161+
# RFC 9112: Content-Length = 1*DIGIT and chunk-size = 1*HEXDIG. int() is more
162+
# permissive (it accepts a leading sign, underscores, surrounding whitespace
163+
# and, in base 16, an "0x" prefix and non-ASCII digits), so the body-framing
164+
# values are matched against the grammar before being passed to int().
165+
_is_legal_content_length = re.compile(r'[0-9]+').fullmatch
166+
_is_legal_chunk_size = re.compile(rb'[0-9a-fA-F]+').fullmatch
167+
161168
# We always set the Content-Length header for these methods because some
162169
# servers will otherwise respond with a 411
163170
_METHODS_EXPECTING_BODY = {'PATCH', 'POST', 'PUT'}
@@ -376,14 +383,8 @@ def begin(self, *, _max_headers=None):
376383
# NOTE: RFC 2616, S4.4, #3 says we ignore this if tr_enc is "chunked"
377384
self.length = None
378385
length = self.headers.get("content-length")
379-
if length and not self.chunked:
380-
try:
381-
self.length = int(length)
382-
except ValueError:
383-
self.length = None
384-
else:
385-
if self.length < 0: # ignore nonsensical negative lengths
386-
self.length = None
386+
if length and not self.chunked and _is_legal_content_length(length):
387+
self.length = int(length)
387388
else:
388389
self.length = None
389390

@@ -550,7 +551,10 @@ def _read_next_chunk_size(self):
550551
i = line.find(b";")
551552
if i >= 0:
552553
line = line[:i] # strip chunk-extensions
554+
line = line.strip()
553555
try:
556+
if not _is_legal_chunk_size(line):
557+
raise ValueError("invalid chunk size")
554558
return int(line, 16)
555559
except ValueError:
556560
# close the connection as protocol synchronisation is

Lib/test/test_httplib.py

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1319,6 +1319,32 @@ def test_negative_content_length(self):
13191319
self.assertEqual(resp.read(), b'Hello\r\n')
13201320
self.assertTrue(resp.isclosed())
13211321

1322+
def test_malformed_content_length(self):
1323+
# RFC 9112: Content-Length = 1*DIGIT. Values that int() accepts but
1324+
# the grammar forbids must not be used to frame the body.
1325+
for value in ('+5', '5_0'):
1326+
with self.subTest(value=value):
1327+
sock = FakeSocket(
1328+
'HTTP/1.1 200 OK\r\nContent-Length: %s\r\n\r\nHello\r\n' % value)
1329+
resp = client.HTTPResponse(sock, method="GET")
1330+
resp.begin()
1331+
self.assertIsNone(resp.length)
1332+
self.assertEqual(resp.read(), b'Hello\r\n')
1333+
resp.close()
1334+
1335+
def test_malformed_chunk_size(self):
1336+
# RFC 9112: chunk-size = 1*HEXDIG. Reject sizes that int(_, 16) accepts
1337+
# but the grammar forbids (a sign, an "0x" prefix or underscores).
1338+
start = 'HTTP/1.1 200 OK\r\nTransfer-Encoding: chunked\r\n\r\n'
1339+
for size in ('-5', '+5', '0x5', '1_f'):
1340+
with self.subTest(size=size):
1341+
sock = FakeSocket(start + '%s\r\nHELLO\r\n0\r\n\r\n' % size)
1342+
resp = client.HTTPResponse(sock, method="GET")
1343+
resp.begin()
1344+
self.assertRaises(client.IncompleteRead, resp.read)
1345+
self.assertTrue(resp.isclosed())
1346+
resp.close()
1347+
13221348
def test_incomplete_read(self):
13231349
sock = FakeSocket('HTTP/1.1 200 OK\r\nContent-Length: 10\r\n\r\nHello\r\n')
13241350
resp = client.HTTPResponse(sock, method="GET")
Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
:mod:`http.client` now validates the ``Content-Length`` header and the
2+
chunked ``chunk-size`` against the RFC 9112 grammar (``1*DIGIT`` and
3+
``1*HEXDIG``) before parsing them, rejecting values such as ``+5``, ``5_0``
4+
or a ``0x``-prefixed or negative chunk size that :func:`int` would otherwise
5+
accept. This avoids framing a response differently from a strict peer.

0 commit comments

Comments
 (0)