@@ -6106,19 +6106,31 @@ def fromiter(iterable, shape, dtype, c_order=True, **kwargs) -> NDArray:
61066106 return dst
61076107
61086108 if c_order or len (shape ) == 1 :
6109- # Read the entire iterator into a numpy array, then write to the destination.
6110- # This is O(total_size) in memory but requires only one pass over the
6111- # iterable and avoids any temporary on-disk file.
6112- buf = np .fromiter (iterable , dtype = dtype , count = total_size )
6113- dst [:] = buf .reshape (shape )
6109+ # --- Phase 3B: chunk-row buffering ---
6110+ # Process one "chunk row" at a time (all chunks sharing the same
6111+ # first-dimension chunk coordinate). This bounds peak memory to
6112+ # O(chunks[0] * prod(shape[1:])) instead of O(total_size), while
6113+ # still using only one np.fromiter call per chunk row.
6114+ #
6115+ # For a (5 000, 5 000) array with chunks=(500, 500) this is
6116+ # 10 × 20 MB reads instead of one 200 MB allocation.
6117+ row_h = dst .chunks [0 ] # elements in dim-0 per chunk row
6118+ tail_nelems = math .prod (shape [1 :]) # elements per row in the remaining dims
6119+
6120+ for row_start in range (0 , shape [0 ], row_h ):
6121+ row_end = builtins .min (row_start + row_h , shape [0 ])
6122+ n = (row_end - row_start ) * tail_nelems
6123+ buf = np .fromiter (islice (iterable , n ), dtype = dtype , count = n )
6124+ dst [row_start :row_end ] = buf .reshape ((row_end - row_start ,) + shape [1 :])
61146125 else :
61156126 # --- Optimisation A: page-buffered chunk-insertion order ---
61166127 # Instead of calling np.fromiter once per chunk (O(n_chunks) calls),
61176128 # we pre-read a page of _PAGE_NELEMS elements at a time. This reduces
61186129 # call overhead from O(n_chunks) to O(total / _PAGE_NELEMS), which is
61196130 # decisive when chunks are small (e.g. (10, 10) → 10 000 chunks vs
61206131 # ~1 page for a 1 000 × 1 000 array).
6121- _PAGE_NELEMS = 1 << 20 # 1 M elements (~8 MB for float64)
6132+ _PAGE_BYTES = 8 << 20 # 8 MB target page size
6133+ _PAGE_NELEMS = builtins .max (1 , _PAGE_BYTES // dtype .itemsize )
61226134
61236135 page = np .empty (0 , dtype = dtype )
61246136 page_start = 0 # index of the first unread element in `page`
0 commit comments