@@ -2400,10 +2400,9 @@ static void SetArgBasedOnType(
24002400
24012401static ur_result_t SetKernelParamsAndLaunch (
24022402 queue_impl &Queue, std::vector<ArgDesc> &Args,
2403- const std::shared_ptr<device_image_impl> &DeviceImageImpl,
2404- ur_kernel_handle_t Kernel, NDRDescT &NDRDesc,
2405- std::vector<ur_event_handle_t > &RawEvents, detail::event_impl *OutEventImpl,
2406- const KernelArgMask *EliminatedArgMask,
2403+ device_image_impl *DeviceImageImpl, ur_kernel_handle_t Kernel,
2404+ NDRDescT &NDRDesc, std::vector<ur_event_handle_t > &RawEvents,
2405+ detail::event_impl *OutEventImpl, const KernelArgMask *EliminatedArgMask,
24072406 const std::function<void *(Requirement *Req)> &getMemAllocationFunc,
24082407 bool IsCooperative, bool KernelUsesClusterLaunch,
24092408 uint32_t WorkGroupMemorySize, const RTDeviceBinaryImage *BinImage,
@@ -2418,8 +2417,7 @@ static ur_result_t SetKernelParamsAndLaunch(
24182417 std::vector<unsigned char > Empty;
24192418 Kernel = Scheduler::getInstance ().completeSpecConstMaterialization (
24202419 Queue, BinImage, KernelName,
2421- DeviceImageImpl.get () ? DeviceImageImpl->get_spec_const_blob_ref ()
2422- : Empty);
2420+ DeviceImageImpl ? DeviceImageImpl->get_spec_const_blob_ref () : Empty);
24232421 }
24242422
24252423 if (KernelFuncPtr && !KernelHasSpecialCaptures) {
@@ -2449,9 +2447,8 @@ static ur_result_t SetKernelParamsAndLaunch(
24492447 } else {
24502448 auto setFunc = [&Adapter, Kernel, &DeviceImageImpl, &getMemAllocationFunc,
24512449 &Queue](detail::ArgDesc &Arg, size_t NextTrueIndex) {
2452- SetArgBasedOnType (Adapter, Kernel, DeviceImageImpl.get (),
2453- getMemAllocationFunc, Queue.getContextImpl (), Arg,
2454- NextTrueIndex);
2450+ SetArgBasedOnType (Adapter, Kernel, DeviceImageImpl, getMemAllocationFunc,
2451+ Queue.getContextImpl (), Arg, NextTrueIndex);
24552452 };
24562453 applyFuncOnFilteredArgs (EliminatedArgMask, Args, setFunc);
24572454 }
@@ -2537,14 +2534,14 @@ static ur_result_t SetKernelParamsAndLaunch(
25372534 return Error;
25382535}
25392536
2540- static std::tuple<ur_kernel_handle_t , std::shared_ptr< device_image_impl> ,
2537+ static std::tuple<ur_kernel_handle_t , device_image_impl * ,
25412538 const KernelArgMask *>
25422539getCGKernelInfo (const CGExecKernel &CommandGroup, context_impl &ContextImpl,
25432540 device_impl &DeviceImpl,
25442541 std::vector<FastKernelCacheValPtr> &KernelCacheValsToRelease) {
25452542
25462543 ur_kernel_handle_t UrKernel = nullptr ;
2547- std::shared_ptr< device_image_impl> DeviceImageImpl = nullptr ;
2544+ device_image_impl * DeviceImageImpl = nullptr ;
25482545 const KernelArgMask *EliminatedArgMask = nullptr ;
25492546 kernel_bundle_impl *KernelBundleImplPtr = CommandGroup.MKernelBundle .get ();
25502547
@@ -2556,7 +2553,7 @@ getCGKernelInfo(const CGExecKernel &CommandGroup, context_impl &ContextImpl,
25562553 CommandGroup.MKernelName )
25572554 : std::shared_ptr<kernel_impl>{nullptr }) {
25582555 UrKernel = SyclKernelImpl->getHandleRef ();
2559- DeviceImageImpl = SyclKernelImpl->getDeviceImage ();
2556+ DeviceImageImpl = & SyclKernelImpl->getDeviceImage ();
25602557 EliminatedArgMask = SyclKernelImpl->getKernelArgMask ();
25612558 } else {
25622559 FastKernelCacheValPtr FastKernelCacheVal =
@@ -2568,8 +2565,7 @@ getCGKernelInfo(const CGExecKernel &CommandGroup, context_impl &ContextImpl,
25682565 // To keep UrKernel valid, we return FastKernelCacheValPtr.
25692566 KernelCacheValsToRelease.push_back (std::move (FastKernelCacheVal));
25702567 }
2571- return std::make_tuple (UrKernel, std::move (DeviceImageImpl),
2572- EliminatedArgMask);
2568+ return std::make_tuple (UrKernel, DeviceImageImpl, EliminatedArgMask);
25732569}
25742570
25752571ur_result_t enqueueImpCommandBufferKernel (
@@ -2586,7 +2582,7 @@ ur_result_t enqueueImpCommandBufferKernel(
25862582 std::vector<FastKernelCacheValPtr> FastKernelCacheValsToRelease;
25872583
25882584 ur_kernel_handle_t UrKernel = nullptr ;
2589- std::shared_ptr< device_image_impl> DeviceImageImpl = nullptr ;
2585+ device_image_impl * DeviceImageImpl = nullptr ;
25902586 const KernelArgMask *EliminatedArgMask = nullptr ;
25912587
25922588 context_impl &ContextImpl = *sycl::detail::getSyclObjImpl (Ctx);
@@ -2610,10 +2606,10 @@ ur_result_t enqueueImpCommandBufferKernel(
26102606 }
26112607
26122608 adapter_impl &Adapter = ContextImpl.getAdapter ();
2613- auto SetFunc = [&Adapter, &UrKernel, &DeviceImageImpl , &ContextImpl ,
2614- &getMemAllocationFunc ](sycl::detail::ArgDesc &Arg,
2615- size_t NextTrueIndex) {
2616- sycl::detail::SetArgBasedOnType (Adapter, UrKernel, DeviceImageImpl. get () ,
2609+ auto SetFunc = [&Adapter, &UrKernel, &ContextImpl , &getMemAllocationFunc ,
2610+ DeviceImageImpl ](sycl::detail::ArgDesc &Arg,
2611+ size_t NextTrueIndex) {
2612+ sycl::detail::SetArgBasedOnType (Adapter, UrKernel, DeviceImageImpl,
26172613 getMemAllocationFunc, ContextImpl, Arg,
26182614 NextTrueIndex);
26192615 };
@@ -2695,7 +2691,7 @@ void enqueueImpKernel(
26952691 const KernelArgMask *EliminatedArgMask;
26962692
26972693 std::shared_ptr<kernel_impl> SyclKernelImpl;
2698- std::shared_ptr< device_image_impl> DeviceImageImpl;
2694+ device_image_impl * DeviceImageImpl = nullptr ;
26992695 FastKernelCacheValPtr KernelCacheVal;
27002696
27012697 if (nullptr != MSyclKernel) {
@@ -2717,7 +2713,7 @@ void enqueueImpKernel(
27172713 ? KernelBundleImplPtr->tryGetKernel (KernelName)
27182714 : std::shared_ptr<kernel_impl>{nullptr })) {
27192715 Kernel = SyclKernelImpl->getHandleRef ();
2720- DeviceImageImpl = SyclKernelImpl->getDeviceImage ();
2716+ DeviceImageImpl = & SyclKernelImpl->getDeviceImage ();
27212717
27222718 Program = DeviceImageImpl->get_ur_program ();
27232719
0 commit comments