From d417944db3b304ce6f09d83f5252e53c065c08bb Mon Sep 17 00:00:00 2001 From: RalfG Date: Wed, 13 May 2026 14:58:36 +0200 Subject: [PATCH 1/2] Fix TSV reader losing empty string values in provenance/metadata/rescoring dicts Empty TSV cells were converted to None at parse time, then stored directly into sub-dicts after the str(v) coercion was removed in 4044a55. Pydantic rejects None where dict[str, str] is expected. Fix: restore empty string for provenance and metadata values; coerce to NaN for rescoring features. --- psm_utils/io/tsv.py | 9 +++++---- tests/test_io/test_tsv.py | 35 ++++++++++++++++++++++++++++++++++- 2 files changed, 39 insertions(+), 5 deletions(-) diff --git a/psm_utils/io/tsv.py b/psm_utils/io/tsv.py index 9fd66a3..6167718 100644 --- a/psm_utils/io/tsv.py +++ b/psm_utils/io/tsv.py @@ -51,6 +51,7 @@ import ast import csv import logging +import math from collections.abc import Iterator from pathlib import Path from typing import Any, TextIO @@ -110,14 +111,14 @@ def _parse_entry(entry: dict[str, str | None]) -> dict[str, Any]: parsed_entry: dict[str, Any] = {} provenance_data: dict[str, str | None] = {} metadata: dict[str, str | None] = {} - rescoring_features: dict[str, str | None] = {} + rescoring_features: dict[str, Any] = {} for k, v in entry.items(): if k.startswith("provenance:"): - provenance_data[k[11:]] = v + provenance_data[k[11:]] = v if v is not None else "" elif k.startswith("meta:"): - metadata[k[5:]] = v + metadata[k[5:]] = v if v is not None else "" elif k.startswith("rescoring:"): - rescoring_features[k[10:]] = v + rescoring_features[k[10:]] = v if v is not None else math.nan else: parsed_entry[k] = v diff --git a/tests/test_io/test_tsv.py b/tests/test_io/test_tsv.py index 8e77395..a06427d 100644 --- a/tests/test_io/test_tsv.py +++ b/tests/test_io/test_tsv.py @@ -1,5 +1,7 @@ """Tests for psm_utils.io.tsv.""" +import math + import pytest from psm_utils.io.exceptions import PSMUtilsIOException @@ -27,13 +29,44 @@ "rescoring_features": {}, }, ), + ( + # Empty string provenance value (e.g. missing optional provenance field) + {"peptidoform": "ACDE", "spectrum_id": "1", "provenance:missing": ""}, + { + "peptidoform": "ACDE", + "spectrum_id": "1", + "provenance_data": {"missing": ""}, + "metadata": {}, + "rescoring_features": {}, + }, + ), + ( + # Empty string rescoring feature value should become NaN + {"peptidoform": "ACDE", "spectrum_id": "1", "rescoring:score": ""}, + { + "peptidoform": "ACDE", + "spectrum_id": "1", + "provenance_data": {}, + "metadata": {}, + "rescoring_features": {"score": float("nan")}, + }, + ), ] class TestTSVReader: def test__parse_entry(self): for test_in, expected_out in test_cases: - assert TSVReader._parse_entry(test_in) == expected_out + result = TSVReader._parse_entry(test_in) + for key, expected_val in expected_out.items(): + if isinstance(expected_val, dict): + for k, v in expected_val.items(): + if isinstance(v, float) and math.isnan(v): + assert math.isnan(result[key][k]) + else: + assert result[key][k] == v + else: + assert result[key] == expected_val def test_iter(self): reader = TSVReader("tests/test_data/test.tsv") From 491378c8aaa760e409f9ac92d20a41d64656f572 Mon Sep 17 00:00:00 2001 From: RalfG Date: Wed, 13 May 2026 15:04:44 +0200 Subject: [PATCH 2/2] Update CHANGELOG and version bump for 1.5.3 --- CHANGELOG.md | 7 +++++++ psm_utils/__init__.py | 2 +- 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 592e413..6857e1d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,13 @@ All notable changes to this project will be documented in this file. The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). +## [1.5.3] - 2026-05-13 + +### Fixed + +- `io.fragpipe`: Fix parsing of modifications (support both splitting on `,` and `, `). +- `io.tsv`: Fix empty string values in `provenance_data` and `metadata` being lost (converted to `None`) when reading TSV files; empty rescoring feature values are now parsed as `NaN` (fixes #145). + ## [1.5.2] - 2026-02-12 ### Fixed diff --git a/psm_utils/__init__.py b/psm_utils/__init__.py index d790bca..0a83a31 100644 --- a/psm_utils/__init__.py +++ b/psm_utils/__init__.py @@ -1,6 +1,6 @@ """Common utilities for parsing and handling PSMs, and search engine results.""" -__version__ = "1.5.2" +__version__ = "1.5.4" __all__ = ["Peptidoform", "PSM", "PSMList"] from warnings import filterwarnings