Skip to content

Commit 50967e0

Browse files
gh-145264: Do not ignore excess Base64 data after the first padded quad
Base64 decoder (see binascii.a2b_base64(), base64.b64decode(), etc) no longer ignores excess data after the first padded quad in non-strict (default) mode. Instead, in conformance with RFC 4648, it ignores the pad character, "=", if it is present before the end of the encoded data.
1 parent 812ef66 commit 50967e0

File tree

3 files changed

+26
-37
lines changed

3 files changed

+26
-37
lines changed

Lib/test/test_binascii.py

Lines changed: 7 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -240,23 +240,21 @@ def assertNonBase64Data(data, expected, ignorechars):
240240

241241
def test_base64_excess_data(self):
242242
# Test excess data exceptions
243-
def assertExcessData(data, non_strict_expected,
244-
ignore_padchar_expected=None):
243+
def assertExcessData(data, non_strict_expected):
245244
assert_regex = r'(?i)Excess data'
246245
data = self.type2test(data)
247246
with self.assertRaisesRegex(binascii.Error, assert_regex):
248247
binascii.a2b_base64(data, strict_mode=True)
249248
self.assertEqual(binascii.a2b_base64(data, strict_mode=False),
250249
non_strict_expected)
251-
if ignore_padchar_expected is not None:
252-
self.assertEqual(binascii.a2b_base64(data, strict_mode=True,
253-
ignorechars=b'='),
254-
ignore_padchar_expected)
250+
self.assertEqual(binascii.a2b_base64(data, strict_mode=True,
251+
ignorechars=b'='),
252+
non_strict_expected)
255253
self.assertEqual(binascii.a2b_base64(data), non_strict_expected)
256254

257-
assertExcessData(b'ab==c', b'i')
258-
assertExcessData(b'ab==cd', b'i', b'i\xb7\x1d')
259-
assertExcessData(b'abc=d', b'i\xb7', b'i\xb7\x1d')
255+
assertExcessData(b'ab==c=', b'i\xb7')
256+
assertExcessData(b'ab==cd', b'i\xb7\x1d')
257+
assertExcessData(b'abc=d', b'i\xb7\x1d')
260258

261259
def test_base64errors(self):
262260
# Test base64 with invalid padding
Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
Base64 decoder (see :func:`binascii.a2b_base64`, `base64.b64decode`, etc) no
2+
longer ignores excess data after the first padded quad in non-strict
3+
(default) mode. Instead, in conformance with :rfc:`4648`, it ignores
4+
the pad character, "=", if it is present before the end of the encoded data.

Modules/binascii.c

Lines changed: 15 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -640,36 +640,24 @@ binascii_a2b_base64_impl(PyObject *module, Py_buffer *data, int strict_mode,
640640
*/
641641
if (this_ch == BASE64_PAD) {
642642
pads++;
643-
644-
if (strict_mode) {
645-
if (quad_pos >= 2 && quad_pos + pads <= 4) {
646-
continue;
647-
}
648-
if (ignorechar(BASE64_PAD, ignorechars, ignorecache)) {
649-
continue;
650-
}
651-
if (quad_pos == 1) {
652-
/* Set an error below. */
653-
break;
654-
}
655-
state = get_binascii_state(module);
656-
if (state) {
657-
PyErr_SetString(state->Error,
658-
(quad_pos == 0 && ascii_data == data->buf)
659-
? "Leading padding not allowed"
660-
: "Excess padding not allowed");
661-
}
662-
goto error_end;
643+
if (quad_pos >= 2 && quad_pos + pads <= 4) {
644+
continue;
663645
}
664-
else {
665-
if (quad_pos >= 2 && quad_pos + pads >= 4) {
666-
/* A pad sequence means we should not parse more input.
667-
** We've already interpreted the data from the quad at this point.
668-
*/
669-
goto done;
670-
}
646+
if (!strict_mode || ignorechar(BASE64_PAD, ignorechars, ignorecache)) {
671647
continue;
672648
}
649+
if (quad_pos == 1) {
650+
/* Set an error below. */
651+
break;
652+
}
653+
state = get_binascii_state(module);
654+
if (state) {
655+
PyErr_SetString(state->Error,
656+
(quad_pos == 0 && ascii_data == data->buf)
657+
? "Leading padding not allowed"
658+
: "Excess padding not allowed");
659+
}
660+
goto error_end;
673661
}
674662

675663
unsigned char v = table_a2b_base64[this_ch];
@@ -748,7 +736,6 @@ binascii_a2b_base64_impl(PyObject *module, Py_buffer *data, int strict_mode,
748736
goto error_end;
749737
}
750738

751-
done:
752739
return PyBytesWriter_FinishWithPointer(writer, bin_data);
753740

754741
error_end:

0 commit comments

Comments
 (0)