diff --git a/Doc/library/pathlib.rst b/Doc/library/pathlib.rst
index 2867015042ee16..923cd4a2c4c1f1 100644
--- a/Doc/library/pathlib.rst
+++ b/Doc/library/pathlib.rst
@@ -1266,6 +1266,8 @@ Reading and writing files
>>> p.read_text()
'Text file contents'
+ Return the number of characters written.
+
An existing file of the same name is overwritten. The optional parameters
have the same meaning as in :func:`open`.
@@ -1286,6 +1288,8 @@ Reading and writing files
>>> p.read_bytes()
b'Binary file contents'
+ Return the number of bytes written.
+
An existing file of the same name is overwritten.
.. versionadded:: 3.5
diff --git a/Include/internal/pycore_pyhash.h b/Include/internal/pycore_pyhash.h
index 84cb72fa6fd1b2..3056dc44cc0f1b 100644
--- a/Include/internal/pycore_pyhash.h
+++ b/Include/internal/pycore_pyhash.h
@@ -27,14 +27,14 @@ _Py_HashPointerRaw(const void *ptr)
* pppppppp ssssssss ........ fnv -- two Py_hash_t
* k0k0k0k0 k1k1k1k1 ........ siphash -- two uint64_t
* ........ ........ ssssssss djbx33a -- 16 bytes padding + one Py_hash_t
- * ........ ........ eeeeeeee pyexpat XML hash salt
+ * eeeeeeee eeeeeeee eeeeeeee pyexpat XML hash salt
*
* memory layout on 32 bit systems
* cccccccc cccccccc cccccccc uc
* ppppssss ........ ........ fnv -- two Py_hash_t
* k0k0k0k0 k1k1k1k1 ........ siphash -- two uint64_t (*)
* ........ ........ ssss.... djbx33a -- 16 bytes padding + one Py_hash_t
- * ........ ........ eeee.... pyexpat XML hash salt
+ * eeeeeeee eeeeeeee eeee.... pyexpat XML hash salt
*
* (*) The siphash member may not be available on 32 bit platforms without
* an unsigned int64 data type.
@@ -58,7 +58,9 @@ typedef union {
Py_hash_t suffix;
} djbx33a;
struct {
- unsigned char padding[16];
+ /* 16 bytes for XML_SetHashSalt16Bytes */
+ uint8_t hashsalt16[16];
+ /* 4/8 bytes for legacy XML_SetHashSalt */
Py_hash_t hashsalt;
} expat;
} _Py_HashSecret_t;
diff --git a/Include/pyexpat.h b/Include/pyexpat.h
index f523f8bb273983..a676e16a7a457e 100644
--- a/Include/pyexpat.h
+++ b/Include/pyexpat.h
@@ -62,6 +62,9 @@ struct PyExpat_CAPI
XML_Parser parser, unsigned long long activationThresholdBytes);
XML_Bool (*SetBillionLaughsAttackProtectionMaximumAmplification)(
XML_Parser parser, float maxAmplificationFactor);
+ /* might be NULL for expat < 2.8.0 */
+ XML_Bool (*SetHashSalt16Bytes)(
+ XML_Parser parser, const uint8_t entropy[16]);
/* always add new stuff to the end! */
};
diff --git a/Lib/pathlib/__init__.py b/Lib/pathlib/__init__.py
index a32e4b5320ff6d..295f633824a6ef 100644
--- a/Lib/pathlib/__init__.py
+++ b/Lib/pathlib/__init__.py
@@ -989,6 +989,7 @@ def read_text(self, encoding=None, errors=None, newline=None):
def write_bytes(self, data):
"""
Open the file in bytes mode, write to it, and close the file.
+ Return the number of bytes written.
"""
# type-check for the buffer interface before truncating the file
view = memoryview(data)
@@ -998,6 +999,7 @@ def write_bytes(self, data):
def write_text(self, data, encoding=None, errors=None, newline=None):
"""
Open the file in text mode, write to it, and close the file.
+ Return the number of characters written.
"""
# Call io.text_encoding() here to ensure any warning is raised at an
# appropriate stack level.
diff --git a/Lib/pathlib/types.py b/Lib/pathlib/types.py
index f21ce0774548f8..bb4a521223da04 100644
--- a/Lib/pathlib/types.py
+++ b/Lib/pathlib/types.py
@@ -431,6 +431,7 @@ def __open_writer__(self, mode):
def write_bytes(self, data):
"""
Open the file in bytes mode, write to it, and close the file.
+ Return the number of bytes written.
"""
# type-check for the buffer interface before truncating the file
view = memoryview(data)
@@ -440,6 +441,7 @@ def write_bytes(self, data):
def write_text(self, data, encoding=None, errors=None, newline=None):
"""
Open the file in text mode, write to it, and close the file.
+ Return the number of characters written.
"""
# Call io.text_encoding() here to ensure any warning is raised at an
# appropriate stack level.
diff --git a/Lib/test/test_pyexpat.py b/Lib/test/test_pyexpat.py
index aaa91aca36e3c4..9a1620029c6da9 100644
--- a/Lib/test/test_pyexpat.py
+++ b/Lib/test/test_pyexpat.py
@@ -712,6 +712,20 @@ def test_change_size_2(self):
parser.Parse(xml2, True)
self.assertEqual(self.n, 4)
+ @support.requires_resource('cpu')
+ @support.requires_resource('walltime')
+ @support.bigmemtest(size=2**31, memuse=4, dry_run=False)
+ def test_large_character_data_does_not_crash(self):
+ # See https://github.com/python/cpython/issues/148441
+ parser = expat.ParserCreate()
+ parser.buffer_text = True
+ parser.buffer_size = 2**31 - 1 # INT_MAX
+ N = 2049 * (1 << 20) - 3 # Character data greater than INT_MAX
+ self.assertGreater(N, parser.buffer_size)
+ parser.CharacterDataHandler = lambda text: None
+ xml_data = b"" + b"A" * N + b""
+ self.assertEqual(parser.Parse(xml_data, True), 1)
+
class ElementDeclHandlerTest(unittest.TestCase):
def test_trigger_leak(self):
# Unfixed, this test would leak the memory of the so-called
diff --git a/Misc/NEWS.d/next/Library/2026-04-23-12-50-15.gh-issue-148441.zvpCkR.rst b/Misc/NEWS.d/next/Library/2026-04-23-12-50-15.gh-issue-148441.zvpCkR.rst
new file mode 100644
index 00000000000000..762815270e4d40
--- /dev/null
+++ b/Misc/NEWS.d/next/Library/2026-04-23-12-50-15.gh-issue-148441.zvpCkR.rst
@@ -0,0 +1,4 @@
+:mod:`xml.parsers.expat`: prevent a crash in
+:meth:`~xml.parsers.expat.xmlparser.CharacterDataHandler`
+when the character data size exceeds the parser's
+:attr:`buffer size `.
diff --git a/Misc/NEWS.d/next/Security/2026-04-26-19-30-45.gh-issue-149018.a9SqWb.rst b/Misc/NEWS.d/next/Security/2026-04-26-19-30-45.gh-issue-149018.a9SqWb.rst
new file mode 100644
index 00000000000000..d1b5b368684e6a
--- /dev/null
+++ b/Misc/NEWS.d/next/Security/2026-04-26-19-30-45.gh-issue-149018.a9SqWb.rst
@@ -0,0 +1,3 @@
+Improved protection against XML hash-flooding attacks in
+:mod:`xml.parsers.expat` and :mod:`xml.etree.ElementTree` when Python is
+compiled with libExpat 2.8.0 or later.
diff --git a/Modules/_elementtree.c b/Modules/_elementtree.c
index cbd1e026df2722..9e794be5c109ba 100644
--- a/Modules/_elementtree.c
+++ b/Modules/_elementtree.c
@@ -3735,8 +3735,12 @@ _elementtree_XMLParser___init___impl(XMLParserObject *self, PyObject *target,
PyErr_NoMemory();
return -1;
}
- /* expat < 2.1.0 has no XML_SetHashSalt() */
- if (EXPAT(st, SetHashSalt) != NULL) {
+ // Prefer 16-byte entropy, only expat >= 2.8.0. See gh-149018
+ if (EXPAT(st, SetHashSalt16Bytes) != NULL) {
+ EXPAT(st, SetHashSalt16Bytes)(self->parser,
+ _Py_HashSecret.expat.hashsalt16);
+ }
+ else if (EXPAT(st, SetHashSalt) != NULL) {
EXPAT(st, SetHashSalt)(self->parser,
(unsigned long)_Py_HashSecret.expat.hashsalt);
}
diff --git a/Modules/pyexpat.c b/Modules/pyexpat.c
index 0f0afe17513ef1..64314e5dff93a1 100644
--- a/Modules/pyexpat.c
+++ b/Modules/pyexpat.c
@@ -393,7 +393,7 @@ my_CharacterDataHandler(void *userData, const XML_Char *data, int len)
if (self->buffer == NULL)
call_character_handler(self, data, len);
else {
- if ((self->buffer_used + len) > self->buffer_size) {
+ if (len > (self->buffer_size - self->buffer_used)) {
if (flush_character_buffer(self) < 0)
return;
/* handler might have changed; drop the rest on the floor
@@ -1533,7 +1533,10 @@ newxmlparseobject(pyexpat_state *state, const char *encoding,
Py_DECREF(self);
return NULL;
}
-#if XML_COMBINED_VERSION >= 20100
+#if XML_COMBINED_VERSION >= 20800
+ /* This feature was added upstream in libexpat 2.8.0. */
+ XML_SetHashSalt16Bytes(self->itself, _Py_HashSecret.expat.hashsalt16);
+#elif XML_COMBINED_VERSION >= 20100
/* This feature was added upstream in libexpat 2.1.0. */
XML_SetHashSalt(self->itself,
(unsigned long)_Py_HashSecret.expat.hashsalt);
@@ -2427,6 +2430,11 @@ pyexpat_exec(PyObject *mod)
#else
capi->SetHashSalt = NULL;
#endif
+#if XML_COMBINED_VERSION >= 20800
+ capi->SetHashSalt16Bytes = XML_SetHashSalt16Bytes;
+#else
+ capi->SetHashSalt16Bytes = NULL;
+#endif
#if XML_COMBINED_VERSION >= 20600
capi->SetReparseDeferralEnabled = XML_SetReparseDeferralEnabled;
#else