feat: add 3.14, drop 3.9

umarbutler · umarbutler · commit 5a4797893d3a · 2026-03-23T16:38:58.000+11:00
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
@@ -12,7 +12,7 @@ jobs:
     strategy:
       fail-fast: false
       matrix:
-        python-version: ["3.9", "3.10", "3.11", "3.12", "3.13"]
+        python-version: ["3.10", "3.11", "3.12", "3.13", "3.14"]
 
     steps:
     - name: Check-out repository
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -7,6 +7,7 @@ All notable changes to semchunk will be documented here. This project adheres to
 - Made it possible to chunk [Isaacus Legal Graph Schema (ILGS) Documents](https://docs.isaacus.com/ilgs/introduction) instead of just strings.
 - Added a new `tokenizer_kwargs` argument to `chunkerify()` allowing users to specify custom keyword arguments to their tokenizers and token counters. `tokenizer_kwargs` can be used to override the default behavior of treating any encountered special tokens as if they are normal text when using a `tiktoken` or `transformers` tokenzier.
 - Where a `tiktoken` or `transformers` tokenizer is used, started treating special tokens as normal text instead of, in the case of `tiktoken`, raising an error and, in the case of `transformers`, treating them as special tokens.
+- Added support for Python 3.14.
 
 ### Changed
 - Demoted asterisks in the hierarchy of splitters from sentence terminators to clause separators to better reflect their typical syntactic function.
@@ -16,6 +17,9 @@ All notable changes to semchunk will be documented here. This project adheres to
 - Significantly improved performance in cases where `merge_splits()` was the biggest bottleneck by switching from joining splits with splitters to indexing into the original text.
 - Slightly sped up `merge_splits()` by switching to the standard library's `bisect_left()` function which is now faster than the previous implementation.
 
+### Removed
+- Dropped support for Python 3.9.
+
 ## [3.2.5] - 2025-10-28
 ### Changed
 - Switched to more accurate monthly download counts from [pypistats.org](https://pypistats.org/) rather than the less accurate counts from [pepy.tech](https://pepy.tech/).
diff --git a/pyproject.toml b/pyproject.toml
@@ -11,7 +11,7 @@ authors = [
 ]
 description = "A Python library for splitting text into smaller chunks while preserving as much local semantic context as possible."
 readme = "README.md"
-requires-python = ">=3.9"
+requires-python = ">=3.10"
 license = {text="MIT"}
 keywords = [
     "chunking",
@@ -33,11 +33,11 @@ classifiers = [
     "Intended Audience :: Science/Research",
     "License :: OSI Approved :: MIT License",
     "Operating System :: OS Independent",
-    "Programming Language :: Python :: 3.9",
     "Programming Language :: Python :: 3.10",
     "Programming Language :: Python :: 3.11",
     "Programming Language :: Python :: 3.12",
     "Programming Language :: Python :: 3.13",
+    "Programming Language :: Python :: 3.14",
     "Programming Language :: Python :: Implementation :: CPython",
     "Topic :: Scientific/Engineering :: Artificial Intelligence",
     "Topic :: Software Development :: Libraries :: Python Modules",