NeuralEnsemble · h-mayorquin · May 11, 2026 · May 14, 2026 · May 14, 2026 · May 15, 2026
diff --git a/neo/rawio/spikegadgetsrawio.py b/neo/rawio/spikegadgetsrawio.py
@@ -8,14 +8,87 @@
 https://bitbucket.org/mkarlsso/trodes/wiki/Configuration
 
 Note :
-  * this file format have multiple version. news version include the gain for scaling.
-     The actual implementation does not contain this feature because we don't have
-     files to test this. So now the gain is "hardcoded" to 1. and so units are
-      not handled correctly.
-
-The ".rec" file format contains:
-  * a first text part with information in an XML structure
-  * a second part for the binary buffer
+  * this file format has multiple versions. When the SpikeConfiguration entries in the
+    XML header carry a `spikeScalingToUv` attribute (newer versions), the reader uses
+    it to populate per-channel `gain` and sets `units="uV"`. When the attribute is
+    absent (older files), the reader falls back to `gain=1` with empty `units` and
+    emits a warning that the data has no physical units.
+
+The ".rec" file format contains two parts:
+
+    +--------------------------------------------+
+    |  XML configuration                         |
+    |    GlobalConfiguration                     |
+    |    HardwareConfiguration                   |
+    |    SpikeConfiguration                      |
+    |    ...                                     |
+    |  terminated by "</Configuration>"          |
+    +--------------------------------------------+
+    |                                            |
+    |  Binary section                            |
+    |    A stream of fixed-size packets, one     |
+    |    per sample tick. packet_size is         |
+    |    constant for a given file, computed     |
+    |    at parse time from the XML.             |
+    |                                            |
+    |    packet 0                                |
+    |    packet 1                                |
+    |    packet 2                                |
+    |    ...                                     |
+    +--------------------------------------------+
+
+Each packet has the following structure (packet_size bytes total):
+
+    +------+-----------+-----------+ ... +-----------+--------+----- ... -----+
+    | 0x55 | device A  | device B  |     | device K  | tstamp | ephys region  |
+    | 1 B  | numBytes  | numBytes  |     | numBytes  |  4 B   | 2*N_ephy B    |
+    +------+-----------+-----------+ ... +-----------+--------+----- ... -----+
+       ^         ^                                       ^           ^
+       |         |                                       |           |
+       packet    one block per hardware device           uint32      one int16 per
+       marker    with a numBytes attribute in            sample      ephys channel;
+                 HardwareConfiguration (e.g.             tick        layout of this
+                 Controller_DIO, Multiplexed,                        region depends
+                 SysClock, ECU)                                      on the device,
+                                                                     see below.
+
+The ephys region is laid out differently depending on the acquisition hardware,
+which is declared by SpikeConfiguration.device:
+
+  * Intan recordings (`device="intan"` or absent on legacy files):
+    chip-interleaved order. The SpikeGadgets MCU (main control unit) clocks all
+    attached Intan chips in parallel over SPI (serial peripheral interface), so
+    samples are written in the sequence
+    [chip 0 ch 0, chip 1 ch 0, ..., chip N-1 ch 0,
+     chip 0 ch 1, chip 1 ch 1, ..., chip N-1 ch 1, ...].
+
+    Example with chanPerChip=32 and 4 chips, every packet's ephys region:
+
+        slot     0    1    2    3  |  4    5    6    7  | ... | 124  125  126  127
+        hwChan   0   32   64   96  |  1   33   65   97  | ... |  31   63   95  127
+
+    Formula: hwChan at slot i = (i % n_chips) * chanPerChip + (i // n_chips).
+    The XML's <SpikeChannel> hwChan attributes are *not* in slot order; they are
+    listed in user-defined SpikeNTrode (tetrode) groupings, so the reader cannot
+    use XML document position as a proxy and must reproduce the chip-interleaved
+    sequence directly (see `_intan_hwchans_in_binary_order`).
+
+  * Neuropixels recordings (`device="neuropixels1"` or `"neuropixels2"`):
+    hwChan ascending order. The SpikeGadgets MCU (main control unit) firmware
+    emits Neuropixels samples in hwChan ascending order: byte pair i of each
+    packet holds the sample from the electrode whose hwChan = i. The XML's
+    <SpikeChannel> elements may be listed in any order Trodes wrote them
+    (typically not hwChan-ascending), but that order does not constrain the
+    binary; the binary always follows hwChan ordering.
+
+    Example, every packet's ephys region:
+
+        slot     0    1    2    3    4    5  ... 380  381  382  383
+        hwChan   0    1    2    3    4    5  ... 380  381  382  383
+
+    The reader walks the XML to recover per-trode metadata (gain, anatomical
+    coordinates) for each hwChan, but the column-to-byte-position mapping is
+    the identity: column i reads byte pair i, with hwChan i's data.
 
 Author: Samuel Garcia
 """
@@ -81,31 +154,49 @@ def __init__(self, filename="", selected_streams=None):
     def _source_name(self):
         return self.filename
 
-    def _produce_ephys_channel_ids(self, n_total_channels, n_channels_per_chip, missing_hw_chans):
-        """Compute the channel ID labels for subset of spikegadgets recordings
-        The ephys channels in the .rec file are stored in the following order:
-        hwChan ID of channel 0 of first chip, hwChan ID of channel 0 of second chip, ..., hwChan ID of channel 0 of Nth chip,
-        hwChan ID of channel 1 of first chip, hwChan ID of channel 1 of second chip, ..., hwChan ID of channel 1 of Nth chip,
-        ...
-        So if there are 32 channels per chip and 128 channels (4 chips), then the channel IDs are:
-        0, 32, 64, 96, 1, 33, 65, 97, ..., 128
-        See also: https://github.com/NeuralEnsemble/python-neo/issues/1215
-
-        This doesn't work for all types of spikegadgets
-        see: https://github.com/NeuralEnsemble/python-neo/issues/1517
-
-        If there are any missing hardware channels, they must be specified in missing_hw_chans.
-        See: https://github.com/NeuralEnsemble/python-neo/issues/1592
+    def _intan_hwchans_in_binary_order(self, sconf, num_ephy_channels, num_ephy_channels_xml):
+        """Return the hwChan-at-each-byte-position sequence for an Intan recording.
+
+        The SpikeGadgets MCU (main control unit) multiplexes Intan chips in chip-interleaved
+        order: local-channel outer, chip inner. Every chip contributes its local channel 0
+        first, then every chip contributes its local channel 1, and so on. So byte pair i of
+        each packet holds the sample for the hwChan returned at index i.
+
+        Example: chanPerChip=32 and 4 chips (128 channels total). Returns
+        [0, 32, 64, 96, 1, 33, 65, 97, ..., 31, 63, 95, 127].
+
+        If the channel count does not divide evenly into chanPerChip we fall back to XML
+        document order; in that case the chip-interleaved bridging assumption does not
+        apply and labels reflect the XML's hwChan attributes directly.
+
+        See also:
+            - https://github.com/NeuralEnsemble/python-neo/issues/1215 (origin of this logic)
+            - https://github.com/NeuralEnsemble/python-neo/issues/1517 (doesn't fit all setups)
+            - https://github.com/NeuralEnsemble/python-neo/issues/1592 (missing channels)
         """
-        ephys_channel_ids_list = []
-        for local_hw_channel in range(n_channels_per_chip):
-            n_chips = int(n_total_channels / n_channels_per_chip)
+        intan_chans_per_chip = int(sconf.attrib.get("chanPerChip", 32))  # RHD2132 default for legacy files
+        hw_chans_in_xml = [int(schan.attrib["hwChan"]) for trode in sconf for schan in trode]
+
+        channels_fit_chip_layout = (
+            intan_chans_per_chip > 0
+            and num_ephy_channels % intan_chans_per_chip == 0
+        )
+        if not channels_fit_chip_layout:
+            return hw_chans_in_xml
+
+        # Reproduce the chip-interleaved hwChan sequence (local-channel outer, chip inner)
+        # so that hwchans_in_binary_order[i] is the hwChan whose data lives at byte pair i.
+        # Any hwChans absent from the SpikeConfiguration are skipped.
+        missing_hw_chans = set(range(num_ephy_channels)) - set(hw_chans_in_xml)
+        n_chips = num_ephy_channels_xml // intan_chans_per_chip
+        hwchans_in_binary_order = []
+        for local_hw_channel in range(intan_chans_per_chip):
             for chip in range(n_chips):
-                global_hw_chan = local_hw_channel + chip * n_channels_per_chip
-                if global_hw_chan in missing_hw_chans:
+                hw_chan = local_hw_channel + chip * intan_chans_per_chip
+                if hw_chan in missing_hw_chans:
                     continue
-                ephys_channel_ids_list.append(local_hw_channel + chip * n_channels_per_chip)
-        return ephys_channel_ids_list
+                hwchans_in_binary_order.append(hw_chan)
+        return hwchans_in_binary_order
 
     def _parse_header(self):
         # parse file until "</Configuration>"
@@ -122,7 +213,7 @@ def _parse_header(self):
 
         # explore xml header
         root = ElementTree.fromstring(header_txt)
-        gconf = sr = root.find("GlobalConfiguration")
+        gconf = root.find("GlobalConfiguration")
         hconf = root.find("HardwareConfiguration")
         sconf = root.find("SpikeConfiguration")
 
@@ -140,12 +231,6 @@ def _parse_header(self):
                 "than the number of channels in the hardware configuration"
             )
 
-        # as spikegadgets change we should follow this
-        try:
-            num_chan_per_chip = int(sconf.attrib["chanPerChip"])
-        except KeyError:
-            num_chan_per_chip = 32  # default value for Intan chips
-
         # explore sub stream and count packet size
         # first bytes is 0x55
         packet_size = 1
@@ -214,61 +299,73 @@ def _parse_header(self):
 
         if num_ephy_channels > 0:
             stream_id = "trodes"
-            stream_name = stream_id
-            buffer_id = ""
-            signal_streams.append((stream_name, stream_id, buffer_id))
+            signal_streams.append((stream_id, stream_id, ""))
             self._mask_channels_bytes[stream_id] = []
 
-            # we can only produce these channels for a subset of spikegadgets setup. If this criteria isn't
-            # true then we should just use the raw_channel_ids and let the end user sort everything out
-            if num_ephy_channels % num_chan_per_chip == 0:
-                all_hw_chans = [int(schan.attrib["hwChan"]) for trode in sconf for schan in trode]
-                missing_hw_chans = set(range(num_ephy_channels)) - set(all_hw_chans)
-                channel_ids = self._produce_ephys_channel_ids(
-                    num_ephy_channels_xml, num_chan_per_chip, missing_hw_chans
-                )
-                raw_channel_ids = False
-            else:
-                raw_channel_ids = True
-
-            chan_ind = 0
             self.is_scaleable = all("spikeScalingToUv" in trode.attrib for trode in sconf)
             if not self.is_scaleable:
                 self.logger.warning(
                     "Unable to read channel gain scaling (to uV) from .rec header. Data has no physical units!"
                 )
 
-            for trode in sconf:
-                if "spikeScalingToUv" in trode.attrib:
-                    gain = float(trode.attrib["spikeScalingToUv"])
+            # Compute the hwChan-at-each-byte-position sequence based on the hardware kind
+            # declared in SpikeConfiguration.device.
+            # - Intan recordings are chip-interleaved by the MCU (main control unit), so we
+            #   reproduce that ordering from the chip layout.
+            # - Neuropixels recordings are emitted in hwChan ascending order: byte pair i
+            #   holds the sample from the electrode whose hwChan = i.
+            spike_device = sconf.attrib.get("device")
+            if spike_device in (None, "intan"):
+                hwchans_in_binary_order = self._intan_hwchans_in_binary_order(
+                    sconf, num_ephy_channels, num_ephy_channels_xml
+                )
+            elif spike_device in ("neuropixels1", "neuropixels2"):
+                xml_hwchans = {int(schan.attrib["hwChan"]) for trode in sconf for schan in trode}
+                expected = set(range(num_ephy_channels))
+                if xml_hwchans != expected:
+                    raise NeoReadWriteError(
+                        "SpikeGadgets Neuropixels: hwChan values in SpikeConfiguration do not "
+                        f"cover [0, {num_ephy_channels}). The firmware emits Neuropixels samples "
+                        "in hwChan ascending order over a contiguous range; this file has "
+                        f"missing or out-of-range hwChans (missing: {sorted(expected - xml_hwchans)[:5]}..., "
+                        f"extra: {sorted(xml_hwchans - expected)[:5]}...)."
+                    )
+                hwchans_in_binary_order = list(range(num_ephy_channels))
+            else:
+                raise NeoReadWriteError(
+                    f"SpikeGadgets: unsupported SpikeConfiguration.device {spike_device!r}. "
+                    "Add a dedicated branch for this hardware in SpikeGadgetsRawIO._parse_header."
+                )
+
+            # Walk binary order, using hwChan as the explicit bridge to per-trode metadata.
+            trode_by_hwchan = {int(schan.attrib["hwChan"]): trode for trode in sconf for schan in trode}
+            schan_by_hwchan = {int(schan.attrib["hwChan"]): schan for trode in sconf for schan in trode}
+            for binary_index, hw_chan in enumerate(hwchans_in_binary_order):
+                parent_trode = trode_by_hwchan[hw_chan]
+                schan = schan_by_hwchan[hw_chan]
+                if "spikeScalingToUv" in parent_trode.attrib:
+                    gain = float(parent_trode.attrib["spikeScalingToUv"])
                     units = "uV"
                 else:
-                    gain = 1  # revert to hardcoded gain
+                    gain = 1.0
                     units = ""
 
-                for schan in trode:
-                    # Here we use raw ids if necessary for parsing (for some neuropixel recordings)
-                    # otherwise we default back to the raw hwChan IDs
-                    if raw_channel_ids:
-                        name = "trode" + trode.attrib["id"] + "chan" + schan.attrib["hwChan"]
-                        chan_id = schan.attrib["hwChan"]
-                    else:
-                        chan_id = str(channel_ids[chan_ind])
-                        name = "hwChan" + chan_id
-
-                    offset = 0.0
-                    buffer_id = ""
-                    signal_channels.append(
-                        (name, chan_id, self._sampling_rate, "int16", units, gain, offset, stream_id, buffer_id)
-                    )
-
-                    chan_mask = np.zeros(packet_size, dtype="bool")
-                    num_bytes = packet_size - 2 * num_ephy_channels + 2 * chan_ind
-                    chan_mask[num_bytes] = True
-                    chan_mask[num_bytes + 1] = True
-                    self._mask_channels_bytes[stream_id].append(chan_mask)
+                if spike_device in ("neuropixels1", "neuropixels2"):
+                    chan_id = f"hwChan{hw_chan}"
+                    sorting_group = schan.attrib.get("spikeSortingGroup", "0")
+                    name = f"probe{sorting_group}_chan{hw_chan}"
+                else:
+                    chan_id = str(hw_chan)
+                    name = f"trode{parent_trode.attrib['id']}chan{hw_chan}"
+                signal_channels.append(
+                    (name, chan_id, self._sampling_rate, "int16", units, gain, 0.0, stream_id, "")
+                )
 
-                    chan_ind += 1
+                num_bytes = packet_size - 2 * num_ephy_channels + 2 * binary_index
+                chan_mask = np.zeros(packet_size, dtype="bool")
+                chan_mask[num_bytes] = True
+                chan_mask[num_bytes + 1] = True
+                self._mask_channels_bytes[stream_id].append(chan_mask)
 
         # make mask as array (used in _get_analogsignal_chunk(...))
         self._mask_streams = {}

diff --git a/neo/test/rawiotest/test_spikegadgetsrawio.py b/neo/test/rawiotest/test_spikegadgetsrawio.py
@@ -17,6 +17,7 @@ class TestSpikeGadgetsRawIO(
         "spikegadgets/20210225_em8_minirec2_ac.rec",
         "spikegadgets/W122_06_09_2019_1_fromSD.rec",
         "spikegadgets/SpikeGadgets_test_data_2xNpix1.0_20240318_173658.rec",
+        "spikegadgets/neuropixels2_4shank/20260122_134412_merged_cropped_1min_NP2.rec",
     ]
 
     def test_parse_header_missing_channels(self):
@@ -53,6 +54,35 @@ def test_parse_header_missing_channels(self):
             # fmt: on
         )
 
+    def test_neuropixels_uses_hwchan_ids(self):
+        # Regression test for Neuropixels channel id and name semantics.
+        # ids are f"hwChan{i}" and names are f"probe{spikeSortingGroup}_chan{i}",
+        # where i is the channel index in the trodes stream (which equals the hwChan
+        # the firmware writes at that byte position, since the SpikeGadgets MCU emits
+        # Neuropixels samples in hwChan ascending order).
+        file_path = Path(
+            self.get_local_path("spikegadgets/SpikeGadgets_test_data_2xNpix1.0_20240318_173658.rec")
+        )
+        reader = SpikeGadgetsRawIO(filename=file_path)
+        reader.parse_header()
+
+        trodes_mask = reader.header["signal_channels"]["stream_id"] == "trodes"
+        trodes_ids = list(reader.header["signal_channels"]["id"][trodes_mask])
+        trodes_names = list(reader.header["signal_channels"]["name"][trodes_mask])
+
+        self.assertEqual(trodes_ids[:4], ["hwChan0", "hwChan1", "hwChan2", "hwChan3"])
+        self.assertEqual(trodes_ids[-4:], ["hwChan764", "hwChan765", "hwChan766", "hwChan767"])
+        self.assertEqual(len(trodes_ids), 768)
+
+        # Names embed the SpikeChannel spikeSortingGroup attribute. The two-probe NP1
+        # workspace sets spikeSortingGroup=0 for one probe and =1 for the other; the
+        # boundary is interleaved across channel indices (not at i=384) because the
+        # two probes' hwChans interleave in chip blocks.
+        self.assertEqual(trodes_names[0], "probe0_chan0")
+        self.assertEqual(trodes_names[767], "probe1_chan767")
+        groups = {n.split("_")[0] for n in trodes_names}
+        self.assertEqual(groups, {"probe0", "probe1"})
+
     def test_opening_gibberish_file(self):
         """Test that parsing a file without </Configuration> raises ValueError instead of infinite loop."""
         # Create a temporary file with gibberish content that doesn't have the required tag