33
44use std:: fmt:: Debug ;
55
6- use crate :: CudaBufferExt ;
7- use crate :: CudaDeviceBuffer ;
8- use crate :: executor:: CudaExecutionCtx ;
9- use crate :: executor:: { CudaArrayExt , CudaExecute } ;
10- use crate :: kernel:: patches:: gpu:: GPUPatches ;
11- use crate :: kernel:: patches:: types:: { DevicePatches , transpose_patches} ;
126use async_trait:: async_trait;
137use cudarc:: driver:: CudaFunction ;
148use cudarc:: driver:: DeviceRepr ;
@@ -24,15 +18,23 @@ use vortex::array::match_each_integer_ptype;
2418use vortex:: dtype:: NativePType ;
2519use vortex:: encodings:: fastlanes:: BitPacked ;
2620use vortex:: encodings:: fastlanes:: BitPackedArray ;
21+ use vortex:: encodings:: fastlanes:: BitPackedArrayExt ;
2722use vortex:: encodings:: fastlanes:: BitPackedDataParts ;
2823use vortex:: encodings:: fastlanes:: unpack_iter:: BitPacked as BitPackedUnpack ;
2924use vortex:: error:: VortexResult ;
3025use vortex:: error:: vortex_ensure;
3126use vortex:: error:: vortex_err;
32- use vortex_array:: arrays:: PatchedArray ;
33- use vortex_array:: arrays:: patched:: PatchedArraySlotsExt ;
3427use vortex_array:: patches:: Patches ;
3528
29+ use crate :: CudaBufferExt ;
30+ use crate :: CudaDeviceBuffer ;
31+ use crate :: executor:: CudaArrayExt ;
32+ use crate :: executor:: CudaExecute ;
33+ use crate :: executor:: CudaExecutionCtx ;
34+ use crate :: kernel:: patches:: gpu:: GPUPatches ;
35+ use crate :: kernel:: patches:: types:: DevicePatches ;
36+ use crate :: kernel:: patches:: types:: transpose_patches;
37+
3638/// CUDA decoder for bit-packed arrays.
3739#[ derive( Debug ) ]
3840pub ( crate ) struct BitPackedExecutor ;
@@ -54,8 +56,13 @@ impl CudaExecute for BitPackedExecutor {
5456 let array =
5557 Self :: try_specialize ( array) . ok_or_else ( || vortex_err ! ( "Expected BitPackedArray" ) ) ?;
5658
59+ let patch_kind = match array. patches ( ) {
60+ Some ( patches) => PatchKind :: Interior ( patches) ,
61+ None => PatchKind :: None ,
62+ } ;
63+
5764 match_each_integer_ptype ! ( array. ptype( array. dtype( ) ) , |A | {
58- decode_bitpacked:: <A >( array, A :: default ( ) , ctx) . await
65+ decode_bitpacked:: <A >( array, A :: default ( ) , patch_kind , ctx) . await
5966 } )
6067 }
6168}
@@ -110,7 +117,7 @@ pub(crate) enum PatchKind {
110117
111118impl PatchKind {
112119 pub ( crate ) async fn execute (
113- mut self ,
120+ self ,
114121 ctx : & mut CudaExecutionCtx ,
115122 ) -> VortexResult < Option < DevicePatches > > {
116123 match self {
@@ -160,6 +167,7 @@ impl PatchKind {
160167pub ( crate ) async fn decode_bitpacked < A > (
161168 array : BitPackedArray ,
162169 reference : A ,
170+ patch_kind : PatchKind ,
163171 ctx : & mut CudaExecutionCtx ,
164172) -> VortexResult < Canonical >
165173where
@@ -171,7 +179,7 @@ where
171179 bit_width,
172180 len,
173181 packed,
174- patches,
182+ patches : _ ,
175183 validity,
176184 } = BitPacked :: into_parts ( array) ;
177185
@@ -192,12 +200,8 @@ where
192200 let cuda_function = bitpacked_cuda_kernel ( bit_width, output_width, ctx) ?;
193201 let config = bitpacked_cuda_launch_config ( output_width, len) ?;
194202
195- // We hold this here to keep the device buffers alive.
196- let device_patches = if let Some ( patches) = patches {
197- Some ( transpose_patches ( & patches, ctx) . await ?)
198- } else {
199- None
200- } ;
203+ // Execute the patch kind to get device patches
204+ let device_patches = patch_kind. execute ( ctx) . await ?;
201205
202206 let patches_arg = if let Some ( p) = & device_patches {
203207 GPUPatches {
0 commit comments