Skip to content

Commit ca60cad

Browse files
committed
perf(transaction): cache serialized records to avoid double serialization
- Cache serialized records in Transaction._buffer_serialized during put() and reuse at commit() time, eliminating redundant serialization - Remove unused start_mtime parameter from _commit_transaction_buffer - Add key length validation to Transaction.delete() and Table.delete() for consistency with put() methods Extract validate_key_length() helper to _keys.py to reduce code duplication. Add tests for key length validation in delete operations.
1 parent 8279539 commit ca60cad

5 files changed

Lines changed: 70 additions & 22 deletions

File tree

src/jsonlt/_keys.py

Lines changed: 22 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -8,8 +8,13 @@
88
from collections.abc import Sequence
99
from typing import TYPE_CHECKING, TypeAlias
1010

11-
from ._constants import MAX_INTEGER_KEY, MAX_TUPLE_ELEMENTS, MIN_INTEGER_KEY
12-
from ._exceptions import InvalidKeyError
11+
from ._constants import (
12+
MAX_INTEGER_KEY,
13+
MAX_KEY_LENGTH,
14+
MAX_TUPLE_ELEMENTS,
15+
MIN_INTEGER_KEY,
16+
)
17+
from ._exceptions import InvalidKeyError, LimitError
1318
from ._json import utf8_byte_length
1419

1520
if TYPE_CHECKING:
@@ -343,3 +348,18 @@ def key_from_json(value: object) -> Key:
343348
return tuple(elements)
344349
msg = f"Cannot convert {type(value).__name__} to key"
345350
raise TypeError(msg)
351+
352+
353+
def validate_key_length(key: Key) -> None:
354+
"""Validate that key length does not exceed the maximum.
355+
356+
Args:
357+
key: The key to validate.
358+
359+
Raises:
360+
LimitError: If key length exceeds MAX_KEY_LENGTH (1024 bytes).
361+
"""
362+
key_len = key_length(key)
363+
if key_len > MAX_KEY_LENGTH:
364+
msg = f"key length {key_len} bytes exceeds maximum {MAX_KEY_LENGTH}"
365+
raise LimitError(msg)

src/jsonlt/_table.py

Lines changed: 7 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@
1010
from typing import TYPE_CHECKING, ClassVar
1111
from typing_extensions import override
1212

13-
from ._constants import MAX_KEY_LENGTH, MAX_RECORD_SIZE
13+
from ._constants import MAX_RECORD_SIZE
1414
from ._encoding import validate_no_surrogates
1515
from ._exceptions import (
1616
ConflictError,
@@ -25,10 +25,10 @@
2525
from ._keys import (
2626
Key,
2727
KeySpecifier,
28-
key_length,
2928
key_specifiers_match,
3029
normalize_key_specifier,
3130
validate_key_arity,
31+
validate_key_length,
3232
)
3333
from ._readable import ReadableMixin
3434
from ._reader import parse_table_content, read_table_file
@@ -431,10 +431,7 @@ def put(self, record: "JSONObject") -> None:
431431

432432
# Extract and validate key
433433
key = extract_key(record, key_specifier)
434-
key_len = key_length(key)
435-
if key_len > MAX_KEY_LENGTH:
436-
msg = f"key length {key_len} bytes exceeds maximum {MAX_KEY_LENGTH}"
437-
raise LimitError(msg)
434+
validate_key_length(key)
438435

439436
# Serialize record
440437
serialized = serialize_json(record)
@@ -528,6 +525,9 @@ def delete(self, key: Key) -> bool:
528525
# Validate key arity matches specifier
529526
validate_key_arity(key, key_specifier)
530527

528+
# Validate key length
529+
validate_key_length(key)
530+
531531
# Build tombstone
532532
tombstone = build_tombstone(key, key_specifier)
533533
serialized = serialize_json(tombstone)
@@ -666,13 +666,12 @@ def _end_transaction(self) -> None:
666666
"""
667667
self._active_transaction = None
668668

669-
def _commit_transaction_buffer( # noqa: PLR0913
669+
def _commit_transaction_buffer(
670670
self,
671671
lines: list[str],
672672
start_state: "dict[Key, JSONObject]",
673673
written_keys: set[Key],
674674
buffer_updates: "dict[Key, JSONObject | None]",
675-
start_mtime: float,
676675
start_size: int,
677676
*,
678677
_retries: int = 0,
@@ -687,7 +686,6 @@ def _commit_transaction_buffer( # noqa: PLR0913
687686
start_state: Snapshot of table state when transaction started.
688687
written_keys: Keys that were modified in the transaction.
689688
buffer_updates: Map of key -> record (or None for delete).
690-
start_mtime: File mtime when transaction started.
691689
start_size: File size when transaction started.
692690
_retries: Internal retry counter (do not pass externally).
693691
@@ -743,7 +741,6 @@ def _commit_transaction_buffer( # noqa: PLR0913
743741
start_state,
744742
written_keys,
745743
buffer_updates,
746-
start_mtime,
747744
start_size,
748745
_retries=_retries + 1,
749746
)

src/jsonlt/_transaction.py

Lines changed: 16 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -9,11 +9,11 @@
99
from typing import TYPE_CHECKING, ClassVar
1010
from typing_extensions import override
1111

12-
from ._constants import MAX_KEY_LENGTH, MAX_RECORD_SIZE
12+
from ._constants import MAX_RECORD_SIZE
1313
from ._encoding import validate_no_surrogates
1414
from ._exceptions import LimitError, TransactionError
1515
from ._json import serialize_json, utf8_byte_length
16-
from ._keys import Key, KeySpecifier, key_length, validate_key_arity
16+
from ._keys import Key, KeySpecifier, validate_key_arity, validate_key_length
1717
from ._readable import ReadableMixin
1818
from ._records import build_tombstone, extract_key, validate_record
1919

@@ -52,6 +52,7 @@ class Transaction(ReadableMixin):
5252
"""
5353

5454
__slots__: ClassVar[tuple[str, ...]] = (
55+
"_buffer_serialized",
5556
"_buffer_updates",
5657
"_cached_sorted_keys",
5758
"_file_mtime",
@@ -69,6 +70,7 @@ class Transaction(ReadableMixin):
6970
_snapshot: "dict[Key, JSONObject]"
7071
_start_state: "dict[Key, JSONObject]"
7172
_buffer_updates: "dict[Key, JSONObject | None]"
73+
_buffer_serialized: "dict[Key, str]"
7274
_written_keys: set[Key]
7375
_finalized: bool
7476
_file_mtime: float
@@ -99,6 +101,7 @@ def __init__(
99101
# reloaded state. Safe because _start_state values are never modified.
100102
self._start_state = state.copy()
101103
self._buffer_updates = {}
104+
self._buffer_serialized = {}
102105
self._written_keys = set()
103106
self._finalized = False
104107
# Cache file stats for skip-reload optimization at commit time
@@ -154,18 +157,18 @@ def put(self, record: "JSONObject") -> None:
154157

155158
# Extract and validate key
156159
key = extract_key(record, self._key_specifier)
157-
key_len = key_length(key)
158-
if key_len > MAX_KEY_LENGTH:
159-
msg = f"key length {key_len} bytes exceeds maximum {MAX_KEY_LENGTH}"
160-
raise LimitError(msg)
160+
validate_key_length(key)
161161

162-
# Serialize record to check size limit (we don't store the serialized form)
162+
# Serialize record to check size limit and cache for commit
163163
serialized = serialize_json(record)
164164
record_bytes = utf8_byte_length(serialized)
165165
if record_bytes > MAX_RECORD_SIZE:
166166
msg = f"record size {record_bytes} bytes exceeds maximum {MAX_RECORD_SIZE}"
167167
raise LimitError(msg)
168168

169+
# Cache serialized form before deep copy (record hasn't been modified)
170+
self._buffer_serialized[key] = serialized
171+
169172
# Buffer the update (only keep latest value per key)
170173
record_copy = copy.deepcopy(record)
171174
self._buffer_updates[key] = record_copy
@@ -196,11 +199,15 @@ def delete(self, key: Key) -> bool:
196199
# Validate key arity matches specifier
197200
validate_key_arity(key, self._key_specifier)
198201

202+
# Validate key length
203+
validate_key_length(key)
204+
199205
# Check if key exists in snapshot
200206
existed = key in self._snapshot
201207

202208
# Buffer the delete (only keep latest state per key)
203209
self._buffer_updates[key] = None
210+
_ = self._buffer_serialized.pop(key, None)
204211
self._written_keys.add(key)
205212

206213
# Update snapshot
@@ -239,8 +246,8 @@ def commit(self) -> None:
239246
tombstone = build_tombstone(key, self._key_specifier)
240247
lines.append(serialize_json(tombstone))
241248
else:
242-
# Record (put)
243-
lines.append(serialize_json(value))
249+
# Record (put) - use cached serialization from put()
250+
lines.append(self._buffer_serialized[key])
244251

245252
# Commit via table (handles locking and conflict detection)
246253
# Transaction is a friend class of Table - protected access is intentional
@@ -249,7 +256,6 @@ def commit(self) -> None:
249256
self._start_state,
250257
self._written_keys,
251258
self._buffer_updates,
252-
self._file_mtime,
253259
self._file_size,
254260
)
255261
finally:

tests/unit/test_table.py

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -591,6 +591,17 @@ def test_delete_tuple_key_arity_mismatch_raises(self, tmp_path: "Path") -> None:
591591
with pytest.raises(InvalidKeyError, match="key arity mismatch"):
592592
_ = table.delete(("acme", 1, "extra")) # 3 elements, specifier has 2
593593

594+
def test_delete_key_length_limit_raises(self, tmp_path: "Path") -> None:
595+
"""Delete with key exceeding 1024 bytes raises LimitError."""
596+
table_path = tmp_path / "test.jsonlt"
597+
table = Table(table_path, key="id")
598+
599+
# 1030 characters + quotes = 1032 bytes > 1024
600+
long_key = "x" * 1030
601+
602+
with pytest.raises(LimitError, match="key length"):
603+
_ = table.delete(long_key)
604+
594605

595606
class TestTableClear:
596607
def test_clear_removes_all_records(self, tmp_path: "Path") -> None:

tests/unit/test_transaction.py

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -276,6 +276,20 @@ def test_put_record_size_limit_raises(self, tmp_path: "Path") -> None:
276276
):
277277
tx.put({"id": "test", "data": large_data})
278278

279+
def test_delete_key_length_limit_raises(self, tmp_path: "Path") -> None:
280+
"""Delete with key exceeding 1024 bytes raises LimitError."""
281+
table_path = tmp_path / "test.jsonlt"
282+
table = Table(table_path, key="id")
283+
284+
# 1030 characters + quotes = 1032 bytes > 1024
285+
long_key = "x" * 1030
286+
287+
with (
288+
table.transaction() as tx,
289+
pytest.raises(LimitError, match="key length"),
290+
):
291+
_ = tx.delete(long_key)
292+
279293

280294
class TestTransactionCommit:
281295
def test_commit_persists_writes(self, tmp_path: "Path") -> None:

0 commit comments

Comments
 (0)