Skip to content

Commit 40fc4ab

Browse files
committed
Honor copy-inducing kwargs in asarray for NDArray inputs
1 parent fbb0d55 commit 40fc4ab

2 files changed

Lines changed: 72 additions & 7 deletions

File tree

src/blosc2/ndarray.py

Lines changed: 47 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -6327,6 +6327,32 @@ def save(array: NDArray, urlpath: str, contiguous=True, **kwargs: Any) -> None:
63276327
array.save(urlpath, contiguous, **kwargs)
63286328

63296329

6330+
def _ndarray_asarray_requires_copy(
6331+
array: NDArray, dtype: np.dtype, chunks, blocks, user_kwargs: dict[str, Any]
6332+
) -> bool:
6333+
if np.dtype(dtype) != np.dtype(array.dtype):
6334+
return True
6335+
if "chunks" in user_kwargs and tuple(chunks) != tuple(array.chunks):
6336+
return True
6337+
if "blocks" in user_kwargs and tuple(blocks) != tuple(array.blocks):
6338+
return True
6339+
6340+
copy_keys = {
6341+
"cparams",
6342+
"dparams",
6343+
"meta",
6344+
"urlpath",
6345+
"contiguous",
6346+
"mode",
6347+
"mmap_mode",
6348+
"initial_mapping_size",
6349+
"storage",
6350+
"out",
6351+
"_chunksize_reduc_factor",
6352+
}
6353+
return builtins.any(key in user_kwargs for key in copy_keys)
6354+
6355+
63306356
def asarray(array: Sequence | blosc2.Array, copy: bool | None = None, **kwargs: Any) -> NDArray:
63316357
"""Convert the `array` to an `NDArray`.
63326358
@@ -6338,16 +6364,18 @@ def asarray(array: Sequence | blosc2.Array, copy: bool | None = None, **kwargs:
63386364
copy: bool | None, optional
63396365
Whether to copy the input. If True, the function copies.
63406366
If False, raise a ValueError if copy is necessary. If None and
6341-
input is NDArray, avoid copy by returning lazyexpr.
6367+
input is NDArray, return the original array when no dtype,
6368+
partition, or storage-related changes are requested.
63426369
Default: None.
63436370
63446371
kwargs: dict, optional
63456372
Keyword arguments that are supported by the :func:`empty` constructor.
63466373
63476374
Returns
63486375
-------
6349-
out: :ref:`NDArray` or :ref:`LazyExpr`
6350-
An new NDArray or LazyExpr made of :paramref:`array`.
6376+
out: :ref:`NDArray`
6377+
A new :ref:`NDArray` made of :paramref:`array`, or the original
6378+
array when a copy is not required.
63516379
63526380
Notes
63536381
-----
@@ -6365,15 +6393,19 @@ def asarray(array: Sequence | blosc2.Array, copy: bool | None = None, **kwargs:
63656393
>>> a = np.arange(0, np.prod(shape), dtype=np.int64).reshape(shape)
63666394
>>> # Create a NDArray from a NumPy array
63676395
>>> nda = blosc2.asarray(a)
6396+
>>> # NDArray inputs are returned as-is unless a copy is requested
6397+
>>> blosc2.asarray(nda) is nda
6398+
True
63686399
"""
6400+
user_kwargs = kwargs.copy()
63696401
# Convert scalars to numpy array
63706402
casting = kwargs.pop("casting", "unsafe")
63716403
if casting != "unsafe":
63726404
raise ValueError("Only unsafe casting is supported at the moment.")
63736405
if not hasattr(array, "shape"):
63746406
array = np.asarray(array) # defaults if dtype=None
63756407
dtype_ = blosc2.proxy.convert_dtype(array.dtype)
6376-
dtype = kwargs.pop("dtype", dtype_) # check if dtype provided
6408+
dtype = blosc2.proxy.convert_dtype(kwargs.pop("dtype", dtype_)) # check if dtype provided
63776409
kwargs = _check_ndarray_kwargs(**kwargs)
63786410
chunks = kwargs.pop("chunks", None)
63796411
blocks = kwargs.pop("blocks", None)
@@ -6385,9 +6417,17 @@ def asarray(array: Sequence | blosc2.Array, copy: bool | None = None, **kwargs:
63856417
if blocks is None and hasattr(array, "blocks") and isinstance(array.blocks, tuple | list):
63866418
blocks = array.blocks
63876419

6388-
copy = True if copy is None and not isinstance(array, NDArray) else copy
6420+
requires_copy = isinstance(array, NDArray) and _ndarray_asarray_requires_copy(
6421+
array, dtype, chunks, blocks, user_kwargs
6422+
)
6423+
if copy is None:
6424+
copy = not isinstance(array, NDArray) or requires_copy
6425+
elif copy is False and requires_copy:
6426+
raise ValueError(
6427+
"Cannot satisfy dtype, partition, or storage changes with copy=False for NDArray input."
6428+
)
63896429
if copy:
6390-
chunks, blocks = compute_chunks_blocks(array.shape, chunks, blocks, dtype_, **kwargs)
6430+
chunks, blocks = compute_chunks_blocks(array.shape, chunks, blocks, dtype, **kwargs)
63916431
# Fast path for small arrays. This is not too expensive in terms of memory consumption.
63926432
shape = array.shape
63936433
small_size = 2**24 # 16 MB
@@ -6402,7 +6442,7 @@ def asarray(array: Sequence | blosc2.Array, copy: bool | None = None, **kwargs:
64026442
return blosc2_ext.asarray(array, chunks, blocks, **kwargs)
64036443

64046444
# Create the empty array
6405-
ndarr = empty(shape, dtype_, chunks=chunks, blocks=blocks, **kwargs)
6445+
ndarr = empty(shape, dtype, chunks=chunks, blocks=blocks, **kwargs)
64066446
behaved = are_partitions_behaved(shape, chunks, blocks)
64076447

64086448
# Get the coordinates of the chunks

tests/ndarray/test_ndarray.py

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -103,6 +103,31 @@ def test_asarray(a):
103103
np.testing.assert_allclose(a, b[:])
104104

105105

106+
def test_asarray_ndarray_persists_copy_when_urlpath_requested(tmp_path):
107+
array = blosc2.asarray(np.arange(10, dtype=np.int64), chunks=(5,), blocks=(2,))
108+
path = tmp_path / "persisted_copy.b2nd"
109+
110+
persisted = blosc2.asarray(array, urlpath=path, mode="w")
111+
112+
assert persisted is not array
113+
assert persisted.urlpath == str(path)
114+
assert path.exists()
115+
np.testing.assert_array_equal(persisted[:], array[:])
116+
117+
118+
def test_asarray_ndarray_copies_for_dtype_changes_and_rejects_copy_false(tmp_path):
119+
array = blosc2.asarray(np.arange(10, dtype=np.int64), chunks=(5,), blocks=(2,))
120+
121+
cast = blosc2.asarray(array, dtype=np.float32)
122+
123+
assert cast is not array
124+
assert cast.dtype == np.float32
125+
np.testing.assert_allclose(cast[:], array[:].astype(np.float32))
126+
127+
with pytest.raises(ValueError, match="copy=False"):
128+
blosc2.asarray(array, urlpath=tmp_path / "persisted_copy_false.b2nd", mode="w", copy=False)
129+
130+
106131
def test_ndarray_info_has_human_sizes():
107132
array = blosc2.asarray(np.arange(16, dtype=np.int32))
108133

0 commit comments

Comments
 (0)