Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,13 @@ All notable changes to this project will be documented in this file.
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).

## [1.5.3] - 2026-05-13

### Fixed

- `io.fragpipe`: Fix parsing of modifications (support both splitting on `,` and `, `).
- `io.tsv`: Fix empty string values in `provenance_data` and `metadata` being lost (converted to `None`) when reading TSV files; empty rescoring feature values are now parsed as `NaN` (fixes #145).

## [1.5.2] - 2026-02-12

### Fixed
Expand Down
2 changes: 1 addition & 1 deletion psm_utils/__init__.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
"""Common utilities for parsing and handling PSMs, and search engine results."""

__version__ = "1.5.2"
__version__ = "1.5.4"
__all__ = ["Peptidoform", "PSM", "PSMList"]

from warnings import filterwarnings
Expand Down
9 changes: 5 additions & 4 deletions psm_utils/io/tsv.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,7 @@
import ast
import csv
import logging
import math
from collections.abc import Iterator
from pathlib import Path
from typing import Any, TextIO
Expand Down Expand Up @@ -110,14 +111,14 @@ def _parse_entry(entry: dict[str, str | None]) -> dict[str, Any]:
parsed_entry: dict[str, Any] = {}
provenance_data: dict[str, str | None] = {}
metadata: dict[str, str | None] = {}
rescoring_features: dict[str, str | None] = {}
rescoring_features: dict[str, Any] = {}
for k, v in entry.items():
if k.startswith("provenance:"):
provenance_data[k[11:]] = v
provenance_data[k[11:]] = v if v is not None else ""
elif k.startswith("meta:"):
metadata[k[5:]] = v
metadata[k[5:]] = v if v is not None else ""
elif k.startswith("rescoring:"):
rescoring_features[k[10:]] = v
rescoring_features[k[10:]] = v if v is not None else math.nan
else:
parsed_entry[k] = v

Expand Down
35 changes: 34 additions & 1 deletion tests/test_io/test_tsv.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
"""Tests for psm_utils.io.tsv."""

import math

import pytest

from psm_utils.io.exceptions import PSMUtilsIOException
Expand Down Expand Up @@ -27,13 +29,44 @@
"rescoring_features": {},
},
),
(
# Empty string provenance value (e.g. missing optional provenance field)
{"peptidoform": "ACDE", "spectrum_id": "1", "provenance:missing": ""},
{
"peptidoform": "ACDE",
"spectrum_id": "1",
"provenance_data": {"missing": ""},
"metadata": {},
"rescoring_features": {},
},
),
(
# Empty string rescoring feature value should become NaN
{"peptidoform": "ACDE", "spectrum_id": "1", "rescoring:score": ""},
{
"peptidoform": "ACDE",
"spectrum_id": "1",
"provenance_data": {},
"metadata": {},
"rescoring_features": {"score": float("nan")},
},
),
]


class TestTSVReader:
def test__parse_entry(self):
for test_in, expected_out in test_cases:
assert TSVReader._parse_entry(test_in) == expected_out
result = TSVReader._parse_entry(test_in)
for key, expected_val in expected_out.items():
if isinstance(expected_val, dict):
for k, v in expected_val.items():
if isinstance(v, float) and math.isnan(v):
assert math.isnan(result[key][k])
else:
assert result[key][k] == v
else:
assert result[key] == expected_val

def test_iter(self):
reader = TSVReader("tests/test_data/test.tsv")
Expand Down
Loading