From 7ca58d26af17f35892d37b1effaa89bee10f01f0 Mon Sep 17 00:00:00 2001 From: Sergey Semenov Date: Mon, 16 Jun 2025 06:21:00 -0700 Subject: [PATCH 01/12] [SYCL] Refactor kernel name based cache approach - Make the cache presence unconditional by looking them up at runtime as a fallback. This consolidates the if branches into one and saves us a couple of map lookups with old applications. - Switch to eager initialization of cache entries. - Add cleanup of cache instances when unloading a library. --- .../sycl/detail/kernel_name_based_cache.hpp | 14 ++++- .../include/sycl/detail/kernel_name_str_t.hpp | 2 + sycl/include/sycl/handler.hpp | 3 +- sycl/source/CMakeLists.txt | 1 + sycl/source/detail/global_handler.cpp | 2 + sycl/source/detail/global_handler.hpp | 6 ++ sycl/source/detail/graph_impl.cpp | 3 +- .../source/detail/kernel_name_based_cache.cpp | 9 +++ .../detail/kernel_name_based_cache_t.hpp | 35 +++++++++--- sycl/source/detail/kernel_program_cache.hpp | 50 +++++------------ .../program_manager/program_manager.cpp | 55 ++++++++++--------- .../program_manager/program_manager.hpp | 32 ++++++----- sycl/source/detail/queue_impl.hpp | 4 +- sycl/source/detail/scheduler/commands.cpp | 18 +++--- sycl/source/handler.cpp | 16 ++++-- 15 files changed, 147 insertions(+), 103 deletions(-) diff --git a/sycl/include/sycl/detail/kernel_name_based_cache.hpp b/sycl/include/sycl/detail/kernel_name_based_cache.hpp index 6bd2e38edc8e7..faf912c4173b0 100644 --- a/sycl/include/sycl/detail/kernel_name_based_cache.hpp +++ b/sycl/include/sycl/detail/kernel_name_based_cache.hpp @@ -8,19 +8,27 @@ #pragma once #include +#include namespace sycl { inline namespace _V1 { namespace detail { struct KernelNameBasedCacheT; + +#ifndef __INTEL_PREVIEW_BREAKING_CHANGES __SYCL_EXPORT KernelNameBasedCacheT *createKernelNameBasedCache(); +#endif +__SYCL_EXPORT KernelNameBasedCacheT * +createKernelNameBasedCache(detail::ABINeutralKernelNameStrRefT MKernelName); // Retrieves a cache pointer unique to a kernel name type that can be used to // avoid kernel name based lookup in the runtime. -template -KernelNameBasedCacheT *getKernelNameBasedCache() { - static KernelNameBasedCacheT *Instance = createKernelNameBasedCache(); +template +KernelNameBasedCacheT * +getKernelNameBasedCache(detail::ABINeutralKernelNameStrRefT KernelName) { + static KernelNameBasedCacheT *Instance = + createKernelNameBasedCache(KernelName); return Instance; } diff --git a/sycl/include/sycl/detail/kernel_name_str_t.hpp b/sycl/include/sycl/detail/kernel_name_str_t.hpp index e0079ffb09c7e..b8ceb395431d4 100644 --- a/sycl/include/sycl/detail/kernel_name_str_t.hpp +++ b/sycl/include/sycl/detail/kernel_name_str_t.hpp @@ -18,10 +18,12 @@ namespace detail { using KernelNameStrT = std::string_view; using KernelNameStrRefT = std::string_view; using ABINeutralKernelNameStrT = detail::string_view; +using ABINeutralKernelNameStrRefT = detail::string_view; #else using KernelNameStrT = std::string; using KernelNameStrRefT = const std::string &; using ABINeutralKernelNameStrT = detail::string; +using ABINeutralKernelNameStrRefT = const detail::string &; #endif inline KernelNameStrT toKernelNameStrT(const ABINeutralKernelNameStrT &str) { diff --git a/sycl/include/sycl/handler.hpp b/sycl/include/sycl/handler.hpp index 73842520d2e93..20fb3f192791e 100644 --- a/sycl/include/sycl/handler.hpp +++ b/sycl/include/sycl/handler.hpp @@ -859,6 +859,8 @@ class __SYCL_EXPORT handler { constexpr std::string_view KernelNameStr = detail::getKernelName(); MKernelName = KernelNameStr; + setKernelNameBasedCachePtr( + detail::getKernelNameBasedCache(KernelNameStr)); } else { // In case w/o the integration header it is necessary to process // accessors from the list(which are associated with this handler) as @@ -866,7 +868,6 @@ class __SYCL_EXPORT handler { // later during finalize. setArgsToAssociatedAccessors(); } - setKernelNameBasedCachePtr(detail::getKernelNameBasedCache()); // If the kernel lambda is callable with a kernel_handler argument, manifest // the associated kernel handler. diff --git a/sycl/source/CMakeLists.txt b/sycl/source/CMakeLists.txt index f2e5494fb6218..9fff541762eca 100644 --- a/sycl/source/CMakeLists.txt +++ b/sycl/source/CMakeLists.txt @@ -286,6 +286,7 @@ set(SYCL_COMMON_SOURCES "detail/kernel_compiler/kernel_compiler_sycl.cpp" "detail/kernel_impl.cpp" "detail/kernel_name_based_cache.cpp" + "detail/kernel_name_based_cache_t.cpp" "detail/kernel_program_cache.cpp" "detail/memory_manager.cpp" "detail/pipes.cpp" diff --git a/sycl/source/detail/global_handler.cpp b/sycl/source/detail/global_handler.cpp index 8c672589f7f59..d3c6894a8f117 100644 --- a/sycl/source/detail/global_handler.cpp +++ b/sycl/source/detail/global_handler.cpp @@ -253,12 +253,14 @@ ThreadPool &GlobalHandler::getHostTaskThreadPool() { return TP; } +#ifndef __INTEL_PREVIEW_BREAKING_CHANGES KernelNameBasedCacheT *GlobalHandler::createKernelNameBasedCache() { static std::deque &KernelNameBasedCaches = getOrCreate(MKernelNameBasedCaches); LockGuard LG{MKernelNameBasedCaches.Lock}; return &KernelNameBasedCaches.emplace_back(); } +#endif void GlobalHandler::releaseDefaultContexts() { // Release shared-pointers to SYCL objects. diff --git a/sycl/source/detail/global_handler.hpp b/sycl/source/detail/global_handler.hpp index 8c66f5a8dcd8d..53bf23df4e6ba 100644 --- a/sycl/source/detail/global_handler.hpp +++ b/sycl/source/detail/global_handler.hpp @@ -11,7 +11,9 @@ #include #include +#ifndef __INTEL_PREVIEW_BREAKING_CHANGES #include +#endif #include #include @@ -75,7 +77,9 @@ class GlobalHandler { ods_target_list &getOneapiDeviceSelectorTargets(const std::string &InitValue); XPTIRegistry &getXPTIRegistry(); ThreadPool &getHostTaskThreadPool(); +#ifndef __INTEL_PREVIEW_BREAKING_CHANGES KernelNameBasedCacheT *createKernelNameBasedCache(); +#endif static void registerStaticVarShutdownHandler(); bool isOkToDefer() const; @@ -131,7 +135,9 @@ class GlobalHandler { InstWithLock MXPTIRegistry; // Thread pool for host task and event callbacks execution InstWithLock MHostTaskThreadPool; +#ifndef __INTEL_PREVIEW_BREAKING_CHANGES InstWithLock> MKernelNameBasedCaches; +#endif }; } // namespace detail } // namespace _V1 diff --git a/sycl/source/detail/graph_impl.cpp b/sycl/source/detail/graph_impl.cpp index 9eec5d32c03fe..67e2490c7a88c 100644 --- a/sycl/source/detail/graph_impl.cpp +++ b/sycl/source/detail/graph_impl.cpp @@ -1539,9 +1539,10 @@ void exec_graph_impl::populateURKernelUpdateStructs( UrKernel = SyclKernelImpl->getHandleRef(); EliminatedArgMask = SyclKernelImpl->getKernelArgMask(); } else { + assert(ExecCG.MKernelNameBasedCachePtr); BundleObjs = sycl::detail::ProgramManager::getInstance().getOrCreateKernel( *ContextImpl, DeviceImpl, ExecCG.MKernelName, - ExecCG.MKernelNameBasedCachePtr); + *ExecCG.MKernelNameBasedCachePtr); UrKernel = BundleObjs->MKernelHandle; EliminatedArgMask = BundleObjs->MKernelArgMask; } diff --git a/sycl/source/detail/kernel_name_based_cache.cpp b/sycl/source/detail/kernel_name_based_cache.cpp index 17356e7f38fc8..8c2f8c5c68720 100644 --- a/sycl/source/detail/kernel_name_based_cache.cpp +++ b/sycl/source/detail/kernel_name_based_cache.cpp @@ -7,15 +7,24 @@ //===----------------------------------------------------------------------===// #include +#include #include namespace sycl { inline namespace _V1 { namespace detail { +#ifndef __INTEL_PREVIEW_BREAKING_CHANGES KernelNameBasedCacheT *createKernelNameBasedCache() { return GlobalHandler::instance().createKernelNameBasedCache(); } +#endif + +KernelNameBasedCacheT * +createKernelNameBasedCache(detail::ABINeutralKernelNameStrRefT KernelName) { + return ProgramManager::getInstance().createKernelNameBasedCache( + KernelName.data()); +} } // namespace detail } // namespace _V1 diff --git a/sycl/source/detail/kernel_name_based_cache_t.hpp b/sycl/source/detail/kernel_name_based_cache_t.hpp index 33632a73a8e66..056a735651d1e 100644 --- a/sycl/source/detail/kernel_name_based_cache_t.hpp +++ b/sycl/source/detail/kernel_name_based_cache_t.hpp @@ -10,6 +10,7 @@ #include #include #include +#include #include #include @@ -83,13 +84,33 @@ struct FastKernelSubcacheT { FastKernelSubcacheMutexT Mutex; }; -struct KernelNameBasedCacheT { - FastKernelSubcacheT FastKernelSubcache; - std::optional UsesAssert; - // Implicit local argument position is represented by an optional int, this - // uses another optional on top of that to represent lazy initialization of - // the cached value. - std::optional> ImplicitLocalArgPos; +// This class is used for caching kernel name based information. +// Pointers to instances of this class are stored in header function templates +// as a static variable to avoid repeated runtime lookup overhead. +class KernelNameBasedCacheT { +public: +#ifndef __INTEL_PREVIEW_BREAKING_CHANGES + KernelNameBasedCacheT() = default; +#endif + KernelNameBasedCacheT(KernelNameStrRefT KernelName); + + void init(KernelNameStrRefT KernelName); +#ifndef __INTEL_PREVIEW_BREAKING_CHANGES + void initIfNeeded(KernelNameStrRefT KernelName); +#endif + FastKernelSubcacheT &getKernelSubcache(); + bool usesAssert(); + const std::optional &getImplicitLocalArgPos(); + +private: + void assertInitialized(); + +#ifndef __INTEL_PREVIEW_BREAKING_CHANGES + std::atomic MInitialized = false; +#endif + FastKernelSubcacheT MFastKernelSubcache; + bool MUsesAssert; + std::optional MImplicitLocalArgPos; }; } // namespace detail diff --git a/sycl/source/detail/kernel_program_cache.hpp b/sycl/source/detail/kernel_program_cache.hpp index 99932d5207cec..b193125f08cb2 100644 --- a/sycl/source/detail/kernel_program_cache.hpp +++ b/sycl/source/detail/kernel_program_cache.hpp @@ -229,25 +229,18 @@ class KernelProgramCache { class FastKernelSubcacheWrapper { public: - FastKernelSubcacheWrapper(FastKernelSubcacheT *CachePtr, + FastKernelSubcacheWrapper(FastKernelSubcacheT &Subcache, ur_context_handle_t UrContext) - : MSubcachePtr{CachePtr}, MUrContext{UrContext} { - if (!MSubcachePtr) { - MOwnsSubcache = true; - MSubcachePtr = new FastKernelSubcacheT(); - } - } + : MSubcachePtr{&Subcache}, MUrContext{UrContext} {} FastKernelSubcacheWrapper(const FastKernelSubcacheWrapper &) = delete; FastKernelSubcacheWrapper(FastKernelSubcacheWrapper &&Other) - : MSubcachePtr{Other.MSubcachePtr}, MOwnsSubcache{Other.MOwnsSubcache}, - MUrContext{Other.MUrContext} { + : MSubcachePtr{Other.MSubcachePtr}, MUrContext{Other.MUrContext} { Other.MSubcachePtr = nullptr; } FastKernelSubcacheWrapper & operator=(const FastKernelSubcacheWrapper &) = delete; FastKernelSubcacheWrapper &operator=(FastKernelSubcacheWrapper &&Other) { MSubcachePtr = Other.MSubcachePtr; - MOwnsSubcache = Other.MOwnsSubcache; MUrContext = Other.MUrContext; Other.MSubcachePtr = nullptr; return *this; @@ -257,11 +250,6 @@ class KernelProgramCache { if (!MSubcachePtr) return; - if (MOwnsSubcache) { - delete MSubcachePtr; - return; - } - // Single subcache might be used by different contexts. // Remove all entries from the subcache that are associated with the // current context. @@ -277,8 +265,7 @@ class KernelProgramCache { FastKernelSubcacheT &get() { return *MSubcachePtr; } private: - FastKernelSubcacheT *MSubcachePtr = nullptr; - bool MOwnsSubcache = false; + FastKernelSubcacheT *MSubcachePtr; ur_context_handle_t MUrContext = nullptr; }; @@ -467,18 +454,9 @@ class KernelProgramCache { FastKernelCacheValPtr tryToGetKernelFast(KernelNameStrRefT KernelName, ur_device_handle_t Device, - FastKernelSubcacheT *KernelSubcacheHint) { - FastKernelCacheWriteLockT Lock(MFastKernelCacheMutex); - if (!KernelSubcacheHint) { - auto It = MFastKernelCache.try_emplace( - KernelName, - FastKernelSubcacheWrapper(KernelSubcacheHint, getURContext())); - KernelSubcacheHint = &It.first->second.get(); - } - - const FastKernelSubcacheEntriesT &SubcacheEntries = - KernelSubcacheHint->Entries; - FastKernelSubcacheReadLockT SubcacheLock{KernelSubcacheHint->Mutex}; + FastKernelSubcacheT &KernelSubcache) { + const FastKernelSubcacheEntriesT &SubcacheEntries = KernelSubcache.Entries; + FastKernelSubcacheReadLockT SubcacheLock{KernelSubcache.Mutex}; ur_context_handle_t Context = getURContext(); const FastKernelCacheKeyT RequiredKey(Device, Context); // Search for the kernel in the subcache. @@ -496,7 +474,7 @@ class KernelProgramCache { void saveKernel(KernelNameStrRefT KernelName, ur_device_handle_t Device, const FastKernelCacheValPtr &CacheVal, - FastKernelSubcacheT *KernelSubcacheHint) { + FastKernelSubcacheT &KernelSubcache) { if (SYCLConfig:: isProgramCacheEvictionEnabled()) { // Save kernel in fast cache only if the corresponding program is also @@ -516,15 +494,13 @@ class KernelProgramCache { // if no insertion took place, then some other thread has already inserted // smth in the cache traceKernel("Kernel inserted.", KernelName, true); - auto It = MFastKernelCache.try_emplace( - KernelName, - FastKernelSubcacheWrapper(KernelSubcacheHint, getURContext())); - KernelSubcacheHint = &It.first->second.get(); + MFastKernelCache.try_emplace( + KernelName, FastKernelSubcacheWrapper(KernelSubcache, getURContext())); - FastKernelSubcacheWriteLockT SubcacheLock{KernelSubcacheHint->Mutex}; + FastKernelSubcacheWriteLockT SubcacheLock{KernelSubcache.Mutex}; ur_context_handle_t Context = getURContext(); - KernelSubcacheHint->Entries.emplace_back( - FastKernelCacheKeyT(Device, Context), CacheVal); + KernelSubcache.Entries.emplace_back(FastKernelCacheKeyT(Device, Context), + CacheVal); } // Expects locked program cache diff --git a/sycl/source/detail/program_manager/program_manager.cpp b/sycl/source/detail/program_manager/program_manager.cpp index 86738958bb0f5..fc22cc25519c5 100644 --- a/sycl/source/detail/program_manager/program_manager.cpp +++ b/sycl/source/detail/program_manager/program_manager.cpp @@ -1127,8 +1127,8 @@ ur_program_handle_t ProgramManager::getBuiltURProgram( FastKernelCacheValPtr ProgramManager::getOrCreateKernel( context_impl &ContextImpl, device_impl &DeviceImpl, - KernelNameStrRefT KernelName, - KernelNameBasedCacheT *KernelNameBasedCachePtr, const NDRDescT &NDRDesc) { + KernelNameStrRefT KernelName, KernelNameBasedCacheT &KernelNameBasedCache, + const NDRDescT &NDRDesc) { if constexpr (DbgProgMgr > 0) { std::cerr << ">>> ProgramManager::getOrCreateKernel(" << &ContextImpl << ", " << &DeviceImpl << ", " << KernelName << ")\n"; @@ -1138,12 +1138,9 @@ FastKernelCacheValPtr ProgramManager::getOrCreateKernel( KernelProgramCache &Cache = ContextImpl.getKernelProgramCache(); ur_device_handle_t UrDevice = DeviceImpl.getHandleRef(); - FastKernelSubcacheT *CacheHintPtr = - KernelNameBasedCachePtr ? &KernelNameBasedCachePtr->FastKernelSubcache - : nullptr; if (SYCLConfig::get()) { - if (auto KernelCacheValPtr = - Cache.tryToGetKernelFast(KernelName, UrDevice, CacheHintPtr)) { + if (auto KernelCacheValPtr = Cache.tryToGetKernelFast( + KernelName, UrDevice, KernelNameBasedCache.getKernelSubcache())) { return KernelCacheValPtr; } } @@ -1200,7 +1197,8 @@ FastKernelCacheValPtr ProgramManager::getOrCreateKernel( // MKernelsPerProgramCache, we need to increase the ref count of the kernel. ContextImpl.getAdapter()->call( KernelArgMaskPair.first); - Cache.saveKernel(KernelName, UrDevice, ret_val, CacheHintPtr); + Cache.saveKernel(KernelName, UrDevice, ret_val, + KernelNameBasedCache.getKernelSubcache()); return ret_val; } @@ -1856,26 +1854,29 @@ void ProgramManager::cacheKernelImplicitLocalArg(RTDeviceBinaryImage &Img) { } } -std::optional ProgramManager::kernelImplicitLocalArgPos( - KernelNameStrRefT KernelName, - KernelNameBasedCacheT *KernelNameBasedCachePtr) const { - auto getLocalArgPos = [&]() -> std::optional { - auto it = m_KernelImplicitLocalArgPos.find(KernelName); - if (it != m_KernelImplicitLocalArgPos.end()) - return it->second; - return {}; - }; +std::optional +ProgramManager::kernelImplicitLocalArgPos(KernelNameStrRefT KernelName) const { + auto it = m_KernelImplicitLocalArgPos.find(KernelName); + if (it != m_KernelImplicitLocalArgPos.end()) + return it->second; + return {}; +} - if (!KernelNameBasedCachePtr) - return getLocalArgPos(); - std::optional> &ImplicitLocalArgPos = - KernelNameBasedCachePtr->ImplicitLocalArgPos; - if (!ImplicitLocalArgPos.has_value()) { - ImplicitLocalArgPos = getLocalArgPos(); - } - return ImplicitLocalArgPos.value(); +KernelNameBasedCacheT * +ProgramManager::createKernelNameBasedCache(KernelNameStrRefT KernelName) { + auto Result = m_KernelNameBasedCaches.try_emplace(KernelName, KernelName); + assert(Result.second && "Kernel name based cache instance already exists"); + return &Result.first->second; } +#ifndef __INTEL_PREVIEW_BREAKING_CHANGES +KernelNameBasedCacheT * +ProgramManager::getOrCreateKernelNameBasedCache(KernelNameStrRefT KernelName) { + auto Result = m_KernelNameBasedCaches.try_emplace(KernelName, KernelName); + return &Result.first->second; +} +#endif + static bool isBfloat16DeviceLibImage(sycl_device_binary RawImg, uint32_t *LibVersion = nullptr) { sycl_device_binary_property_set ImgPS; @@ -2239,6 +2240,10 @@ void ProgramManager::removeImages(sycl_device_binaries DeviceBinary) { m_KernelIDs2BinImage.erase(It->second); m_KernelName2KernelIDs.erase(It); } + if (auto It = m_KernelNameBasedCaches.find(EntriesIt->GetName()); + It != m_KernelNameBasedCaches.end()) { + m_KernelNameBasedCaches.erase(It); + } } // Drop reverse mapping diff --git a/sycl/source/detail/program_manager/program_manager.hpp b/sycl/source/detail/program_manager/program_manager.hpp index 7d0d7d01b86c8..060fb603ef989 100644 --- a/sycl/source/detail/program_manager/program_manager.hpp +++ b/sycl/source/detail/program_manager/program_manager.hpp @@ -200,7 +200,7 @@ class ProgramManager { FastKernelCacheValPtr getOrCreateKernel(context_impl &ContextImpl, device_impl &DeviceImpl, KernelNameStrRefT KernelName, - KernelNameBasedCacheT *KernelNameBasedCachePtr, + KernelNameBasedCacheT &KernelNameBasedCache, const NDRDescT &NDRDesc = {}); ur_kernel_handle_t getCachedMaterializedKernel( @@ -358,23 +358,22 @@ class ProgramManager { ~ProgramManager() = default; template - bool kernelUsesAssert(const NameT &KernelName, - KernelNameBasedCacheT *KernelNameBasedCachePtr) const { - if (!KernelNameBasedCachePtr) - return m_KernelUsesAssert.find(KernelName) != m_KernelUsesAssert.end(); - - std::optional &UsesAssert = KernelNameBasedCachePtr->UsesAssert; - if (!UsesAssert.has_value()) - UsesAssert = - m_KernelUsesAssert.find(KernelName) != m_KernelUsesAssert.end(); - return UsesAssert.value(); + bool kernelUsesAssert(const NameT &KernelName) const { + return m_KernelUsesAssert.find(KernelName) != m_KernelUsesAssert.end(); } SanitizerType kernelUsesSanitizer() const { return m_SanitizerFoundInImage; } - std::optional kernelImplicitLocalArgPos( - KernelNameStrRefT KernelName, - KernelNameBasedCacheT *KernelNameBasedCachePtr) const; + std::optional + kernelImplicitLocalArgPos(KernelNameStrRefT KernelName) const; + + KernelNameBasedCacheT * + createKernelNameBasedCache(KernelNameStrRefT KernelName); + +#ifndef __INTEL_PREVIEW_BREAKING_CHANGES + KernelNameBasedCacheT * + getOrCreateKernelNameBasedCache(KernelNameStrRefT KernelName); +#endif std::set getRawDeviceImages(const std::vector &KernelIDs); @@ -526,6 +525,11 @@ class ProgramManager { KernelUsesAssertSet m_KernelUsesAssert; std::unordered_map m_KernelImplicitLocalArgPos; + // Map for storing kernel name based caches. Runtime lookup should only be + // performed for ABI compatibility and user library unloading. + std::unordered_map + m_KernelNameBasedCaches; + // Sanitizer type used in device image SanitizerType m_SanitizerFoundInImage; diff --git a/sycl/source/detail/queue_impl.hpp b/sycl/source/detail/queue_impl.hpp index a79884f2c46d5..5920c8794f961 100644 --- a/sycl/source/detail/queue_impl.hpp +++ b/sycl/source/detail/queue_impl.hpp @@ -870,9 +870,7 @@ class queue_impl : public std::enable_shared_from_this { // Kernel only uses assert if it's non interop one KernelUsesAssert = (!Handler.MKernel || Handler.MKernel->hasSYCLMetadata()) && - ProgramManager::getInstance().kernelUsesAssert( - Handler.MKernelName.data(), - Handler.impl->MKernelNameBasedCachePtr); + Handler.impl->MKernelNameBasedCachePtr->usesAssert(); auto &PostProcess = *PostProcessorFunc; PostProcess(IsKernel, KernelUsesAssert, Event); diff --git a/sycl/source/detail/scheduler/commands.cpp b/sycl/source/detail/scheduler/commands.cpp index d376db2d398c8..b9df039182661 100644 --- a/sycl/source/detail/scheduler/commands.cpp +++ b/sycl/source/detail/scheduler/commands.cpp @@ -2014,10 +2014,11 @@ void instrumentationAddExtraKernelMetadata( // NOTE: Queue can be null when kernel is directly enqueued to a command // buffer // by graph API, when a modifiable graph is finalized. + assert(KernelNameBasedCachePtr); FastKernelCacheValPtr FastKernelCacheVal = detail::ProgramManager::getInstance().getOrCreateKernel( *Queue->getContextImplPtr(), Queue->getDeviceImpl(), KernelName, - KernelNameBasedCachePtr); + *KernelNameBasedCachePtr); EliminatedArgMask = FastKernelCacheVal->MKernelArgMask; } @@ -2446,9 +2447,8 @@ static ur_result_t SetKernelParamsAndLaunch( applyFuncOnFilteredArgs(EliminatedArgMask, Args, setFunc); } - std::optional ImplicitLocalArg = - ProgramManager::getInstance().kernelImplicitLocalArgPos( - KernelName, KernelNameBasedCachePtr); + const std::optional &ImplicitLocalArg = + KernelNameBasedCachePtr->getImplicitLocalArgPos(); // Set the implicit local memory buffer to support // get_work_group_scratch_memory. This is for backend not supporting // CUDA-style local memory setting. Note that we may have -1 as a position, @@ -2550,10 +2550,11 @@ getCGKernelInfo(const CGExecKernel &CommandGroup, ContextImplPtr ContextImpl, DeviceImageImpl = SyclKernelImpl->getDeviceImage(); EliminatedArgMask = SyclKernelImpl->getKernelArgMask(); } else { + assert(CommandGroup.MKernelNameBasedCachePtr); FastKernelCacheValPtr FastKernelCacheVal = sycl::detail::ProgramManager::getInstance().getOrCreateKernel( *ContextImpl, DeviceImpl, CommandGroup.MKernelName, - CommandGroup.MKernelNameBasedCachePtr); + *CommandGroup.MKernelNameBasedCachePtr); UrKernel = FastKernelCacheVal->MKernelHandle; EliminatedArgMask = FastKernelCacheVal->MKernelArgMask; // To keep UrKernel valid, we return FastKernelCacheValPtr. @@ -2714,8 +2715,10 @@ void enqueueImpKernel( EliminatedArgMask = SyclKernelImpl->getKernelArgMask(); KernelMutex = SyclKernelImpl->getCacheMutex(); } else { + assert(KernelNameBasedCachePtr); KernelCacheVal = detail::ProgramManager::getInstance().getOrCreateKernel( - *ContextImpl, DeviceImpl, KernelName, KernelNameBasedCachePtr, NDRDesc); + *ContextImpl, DeviceImpl, KernelName, *KernelNameBasedCachePtr, + NDRDesc); Kernel = KernelCacheVal->MKernelHandle; KernelMutex = KernelCacheVal->MMutex; Program = KernelCacheVal->MProgramHandle; @@ -3250,8 +3253,7 @@ ur_result_t ExecCGCommand::enqueueImpQueue() { // Kernel only uses assert if it's non interop one bool KernelUsesAssert = (!SyclKernel || SyclKernel->hasSYCLMetadata()) && - ProgramManager::getInstance().kernelUsesAssert( - KernelName, ExecKernel->MKernelNameBasedCachePtr); + ExecKernel->MKernelNameBasedCachePtr->usesAssert(); if (KernelUsesAssert) { EventImpl = MEvent.get(); } diff --git a/sycl/source/handler.cpp b/sycl/source/handler.cpp index 8f55572622a70..aa55a0f7fa89e 100644 --- a/sycl/source/handler.cpp +++ b/sycl/source/handler.cpp @@ -461,6 +461,16 @@ event handler::finalize() { } if (type == detail::CGType::Kernel) { +#ifndef __INTEL_PREVIEW_BREAKING_CHANGES + if (impl->MKernelNameBasedCachePtr) { + impl->MKernelNameBasedCachePtr->initIfNeeded( + toKernelNameStrT(MKernelName)); + } else { + impl->MKernelNameBasedCachePtr = + detail::ProgramManager::getInstance().getOrCreateKernelNameBasedCache( + toKernelNameStrT(MKernelName)); + } +#endif // If there were uses of set_specialization_constant build the kernel_bundle std::shared_ptr KernelBundleImpPtr = getOrInsertHandlerKernelBundle(/*Insert=*/false); @@ -538,10 +548,8 @@ event handler::finalize() { !impl->MEventNeeded && impl->get_queue().supportsDiscardingPiEvents(); if (DiscardEvent) { // Kernel only uses assert if it's non interop one - bool KernelUsesAssert = - !(MKernel && MKernel->isInterop()) && - detail::ProgramManager::getInstance().kernelUsesAssert( - toKernelNameStrT(MKernelName), impl->MKernelNameBasedCachePtr); + bool KernelUsesAssert = !(MKernel && MKernel->isInterop()) && + impl->MKernelNameBasedCachePtr->usesAssert(); DiscardEvent = !KernelUsesAssert; } From 237238cf5cb62db5df477674b429624180ea1b3c Mon Sep 17 00:00:00 2001 From: Sergey Semenov Date: Wed, 25 Jun 2025 04:06:48 -0700 Subject: [PATCH 02/12] Add missing file --- .../detail/kernel_name_based_cache_t.cpp | 56 +++++++++++++++++++ 1 file changed, 56 insertions(+) create mode 100644 sycl/source/detail/kernel_name_based_cache_t.cpp diff --git a/sycl/source/detail/kernel_name_based_cache_t.cpp b/sycl/source/detail/kernel_name_based_cache_t.cpp new file mode 100644 index 0000000000000..7656fb8339c67 --- /dev/null +++ b/sycl/source/detail/kernel_name_based_cache_t.cpp @@ -0,0 +1,56 @@ +//==-------------------- kernel_name_based_cache_t.cpp ---------------------==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +#include +#include + +namespace sycl { +inline namespace _V1 { +namespace detail { + +KernelNameBasedCacheT::KernelNameBasedCacheT(KernelNameStrRefT KernelName) { + init(KernelName); +} + +void KernelNameBasedCacheT::init(KernelNameStrRefT KernelName) { + auto &PM = detail::ProgramManager::getInstance(); + MUsesAssert = PM.kernelUsesAssert(KernelName); + MImplicitLocalArgPos = PM.kernelImplicitLocalArgPos(KernelName); +#ifndef __INTEL_PREVIEW_BREAKING_CHANGES + MInitialized.store(true); +#endif +} + +#ifndef __INTEL_PREVIEW_BREAKING_CHANGES +void KernelNameBasedCacheT::initIfNeeded(KernelNameStrRefT KernelName) { + if (!MInitialized.load()) + init(KernelName); +} +#endif + +FastKernelSubcacheT &KernelNameBasedCacheT::getKernelSubcache() { + assertInitialized(); + return MFastKernelSubcache; +} +bool KernelNameBasedCacheT::usesAssert(){ + assertInitialized(); + return MUsesAssert; +} +const std::optional &KernelNameBasedCacheT::getImplicitLocalArgPos() { + assertInitialized(); + return MImplicitLocalArgPos; +} + +void KernelNameBasedCacheT::assertInitialized() { +#ifndef __INTEL_PREVIEW_BREAKING_CHANGES + assert(MInitialized.load() && "Cache needs to be initialized before use"); +#endif + } + +} // namespace detail +} // namespace _V1 +} // namespace sycl \ No newline at end of file From 52ab4b204b3873c18bb992c32e859e08ac2cab86 Mon Sep 17 00:00:00 2001 From: Sergey Semenov Date: Wed, 25 Jun 2025 04:12:36 -0700 Subject: [PATCH 03/12] Apply clang-format --- sycl/source/detail/kernel_name_based_cache_t.cpp | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/sycl/source/detail/kernel_name_based_cache_t.cpp b/sycl/source/detail/kernel_name_based_cache_t.cpp index 7656fb8339c67..dd63d8d7d4d4b 100644 --- a/sycl/source/detail/kernel_name_based_cache_t.cpp +++ b/sycl/source/detail/kernel_name_based_cache_t.cpp @@ -13,15 +13,15 @@ inline namespace _V1 { namespace detail { KernelNameBasedCacheT::KernelNameBasedCacheT(KernelNameStrRefT KernelName) { - init(KernelName); + init(KernelName); } void KernelNameBasedCacheT::init(KernelNameStrRefT KernelName) { - auto &PM = detail::ProgramManager::getInstance(); - MUsesAssert = PM.kernelUsesAssert(KernelName); - MImplicitLocalArgPos = PM.kernelImplicitLocalArgPos(KernelName); + auto &PM = detail::ProgramManager::getInstance(); + MUsesAssert = PM.kernelUsesAssert(KernelName); + MImplicitLocalArgPos = PM.kernelImplicitLocalArgPos(KernelName); #ifndef __INTEL_PREVIEW_BREAKING_CHANGES - MInitialized.store(true); + MInitialized.store(true); #endif } @@ -36,7 +36,7 @@ FastKernelSubcacheT &KernelNameBasedCacheT::getKernelSubcache() { assertInitialized(); return MFastKernelSubcache; } -bool KernelNameBasedCacheT::usesAssert(){ +bool KernelNameBasedCacheT::usesAssert() { assertInitialized(); return MUsesAssert; } @@ -49,7 +49,7 @@ void KernelNameBasedCacheT::assertInitialized() { #ifndef __INTEL_PREVIEW_BREAKING_CHANGES assert(MInitialized.load() && "Cache needs to be initialized before use"); #endif - } +} } // namespace detail } // namespace _V1 From fd61cc2b72ff7d5ca75c0b41fa51d4229a9f5790 Mon Sep 17 00:00:00 2001 From: Sergey Semenov Date: Wed, 25 Jun 2025 04:23:44 -0700 Subject: [PATCH 04/12] Fix struct/class warning --- sycl/include/sycl/detail/kernel_name_based_cache.hpp | 2 +- sycl/source/detail/global_handler.hpp | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/sycl/include/sycl/detail/kernel_name_based_cache.hpp b/sycl/include/sycl/detail/kernel_name_based_cache.hpp index faf912c4173b0..b005dc4d29477 100644 --- a/sycl/include/sycl/detail/kernel_name_based_cache.hpp +++ b/sycl/include/sycl/detail/kernel_name_based_cache.hpp @@ -14,7 +14,7 @@ namespace sycl { inline namespace _V1 { namespace detail { -struct KernelNameBasedCacheT; +class KernelNameBasedCacheT; #ifndef __INTEL_PREVIEW_BREAKING_CHANGES __SYCL_EXPORT KernelNameBasedCacheT *createKernelNameBasedCache(); diff --git a/sycl/source/detail/global_handler.hpp b/sycl/source/detail/global_handler.hpp index 53bf23df4e6ba..21dbd34ae55a6 100644 --- a/sycl/source/detail/global_handler.hpp +++ b/sycl/source/detail/global_handler.hpp @@ -29,7 +29,7 @@ class Adapter; class ods_target_list; class XPTIRegistry; class ThreadPool; -struct KernelNameBasedCacheT; +class KernelNameBasedCacheT; using ContextImplPtr = std::shared_ptr; using AdapterPtr = std::shared_ptr; From 281a6a5442163c82a442827523b1f0e3a531b013 Mon Sep 17 00:00:00 2001 From: Sergey Semenov Date: Mon, 30 Jun 2025 04:07:11 -0700 Subject: [PATCH 05/12] Fix cleanup --- .../program_manager/program_manager.cpp | 80 +++++++++---------- 1 file changed, 40 insertions(+), 40 deletions(-) diff --git a/sycl/source/detail/program_manager/program_manager.cpp b/sycl/source/detail/program_manager/program_manager.cpp index 98dbb165e1738..6dc5dfabe31ee 100644 --- a/sycl/source/detail/program_manager/program_manager.cpp +++ b/sycl/source/detail/program_manager/program_manager.cpp @@ -2193,46 +2193,6 @@ void ProgramManager::removeImages(sycl_device_binaries DeviceBinary) { // Drop the kernel argument mask map m_EliminatedKernelArgMasks.erase(Img); - // Unmap the unique kernel IDs for the offload entries - for (sycl_offload_entry EntriesIt = EntriesB; EntriesIt != EntriesE; - EntriesIt = EntriesIt->Increment()) { - - // Drop entry for service kernel - if (std::strstr(EntriesIt->GetName(), "__sycl_service_kernel__")) { - m_ServiceKernels.erase(EntriesIt->GetName()); - continue; - } - - // Exported device functions won't have a kernel ID - if (m_ExportedSymbolImages.find(EntriesIt->GetName()) != - m_ExportedSymbolImages.end()) { - continue; - } - - // remove everything associated with this KernelName - m_KernelUsesAssert.erase(EntriesIt->GetName()); - m_KernelImplicitLocalArgPos.erase(EntriesIt->GetName()); - - if (auto It = m_KernelName2KernelIDs.find(EntriesIt->GetName()); - It != m_KernelName2KernelIDs.end()) { - m_KernelIDs2BinImage.erase(It->second); - m_KernelName2KernelIDs.erase(It); - } - if (auto It = m_KernelNameBasedCaches.find(EntriesIt->GetName()); - It != m_KernelNameBasedCaches.end()) { - m_KernelNameBasedCaches.erase(It); - } - } - - // Drop reverse mapping - m_BinImg2KernelIDs.erase(Img); - - // Unregister exported symbols (needs to happen after the ID unmap loop) - for (const sycl_device_binary_property &ESProp : - Img->getExportedSymbols()) { - m_ExportedSymbolImages.erase(ESProp->Name); - } - for (const sycl_device_binary_property &VFProp : Img->getVirtualFunctions()) { std::string StrValue = DeviceBinaryProperty(VFProp).asCString(); @@ -2279,6 +2239,46 @@ void ProgramManager::removeImages(sycl_device_binaries DeviceBinary) { } } + // Unmap the unique kernel IDs for the offload entries + for (sycl_offload_entry EntriesIt = EntriesB; EntriesIt != EntriesE; + EntriesIt = EntriesIt->Increment()) { + + // Drop entry for service kernel + if (std::strstr(EntriesIt->GetName(), "__sycl_service_kernel__")) { + m_ServiceKernels.erase(EntriesIt->GetName()); + continue; + } + + // Exported device functions won't have a kernel ID + if (m_ExportedSymbolImages.find(EntriesIt->GetName()) != + m_ExportedSymbolImages.end()) { + continue; + } + + // remove everything associated with this KernelName + m_KernelUsesAssert.erase(EntriesIt->GetName()); + m_KernelImplicitLocalArgPos.erase(EntriesIt->GetName()); + + if (auto It = m_KernelName2KernelIDs.find(EntriesIt->GetName()); + It != m_KernelName2KernelIDs.end()) { + m_KernelIDs2BinImage.erase(It->second); + m_KernelName2KernelIDs.erase(It); + } + if (auto It = m_KernelNameBasedCaches.find(EntriesIt->GetName()); + It != m_KernelNameBasedCaches.end()) { + m_KernelNameBasedCaches.erase(It); + } + } + + // Drop reverse mapping + m_BinImg2KernelIDs.erase(Img); + + // Unregister exported symbols (needs to happen after the ID unmap loop) + for (const sycl_device_binary_property &ESProp : + Img->getExportedSymbols()) { + m_ExportedSymbolImages.erase(ESProp->Name); + } + m_DeviceImages.erase(DevImgIt); } } From a296d87e4e9bba688e2d1232c4e983aaec0d9584 Mon Sep 17 00:00:00 2001 From: Sergey Semenov Date: Mon, 30 Jun 2025 13:47:17 -0700 Subject: [PATCH 06/12] Apply clang-format --- sycl/source/detail/global_handler.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/sycl/source/detail/global_handler.cpp b/sycl/source/detail/global_handler.cpp index ef15e18a65f02..d0538851daf03 100644 --- a/sycl/source/detail/global_handler.cpp +++ b/sycl/source/detail/global_handler.cpp @@ -390,7 +390,6 @@ void shutdown_late() { Handler->MScheduler.Inst.reset(nullptr); Handler->MProgramManager.Inst.reset(nullptr); - #ifndef __INTEL_PREVIEW_BREAKING_CHANGES // Cache stores handles to the adapter, so clear it before // releasing adapters. From cb0b393b2c248066d78666bc42ea1b47c4c1756a Mon Sep 17 00:00:00 2001 From: Sergey Semenov Date: Tue, 1 Jul 2025 08:47:44 -0700 Subject: [PATCH 07/12] [SYCL][NFC] Split unit test kernels based on their args Most of the unit test kernels don't have any arguments, and those that do typically use a single accessor. Prior to this change, kernel size was passed as a template parameter of the kernel function type, while still sharing the same kernel name. This causes problems for the upcoming change to kernel name based caches, which will be tied to both types and strings. Since currently there are only 2 cases for such test kernels: no arguments and a single accessor, this patch simply adds a separate kernel for the latter case. --- .../Extensions/CommandGraph/Barrier.cpp | 118 +++++++++--------- .../Extensions/CommandGraph/CommandGraph.cpp | 62 ++++----- .../Extensions/CommandGraph/Exceptions.cpp | 20 +-- .../Extensions/CommandGraph/InOrderQueue.cpp | 66 +++++----- .../Extensions/CommandGraph/MultiThreaded.cpp | 24 ++-- .../Extensions/CommandGraph/Properties.cpp | 2 +- .../Extensions/CommandGraph/Queries.cpp | 12 +- .../Extensions/CommandGraph/Regressions.cpp | 12 +- .../Extensions/CommandGraph/Subgraph.cpp | 32 ++--- .../CommandGraph/TopologicalSort.cpp | 14 +-- .../Extensions/CommandGraph/Update.cpp | 18 +-- .../Extensions/EnqueueFunctionsEvents.cpp | 26 ++-- .../FreeFunctionCommands/Barrier.cpp | 8 +- sycl/unittests/Extensions/GetLastEvent.cpp | 2 +- .../unittests/Extensions/KernelProperties.cpp | 2 +- .../KernelQueries/SpillMemorySize.cpp | 4 +- sycl/unittests/Extensions/LaunchQueries.cpp | 2 +- sycl/unittests/SYCL2020/GetNativeOpenCL.cpp | 3 +- sycl/unittests/buffer/BufferLocation.cpp | 10 +- sycl/unittests/buffer/MemChannel.cpp | 3 +- sycl/unittests/buffer/SubbufferLargeSize.cpp | 2 +- sycl/unittests/event/EventDestruction.cpp | 24 ++-- sycl/unittests/helpers/TestKernel.hpp | 31 +++-- sycl/unittests/program_manager/SubDevices.cpp | 6 +- .../program_manager/itt_annotations.cpp | 4 +- sycl/unittests/queue/Barrier.cpp | 16 +-- sycl/unittests/queue/GetProfilingInfo.cpp | 4 +- .../scheduler/AccessorDefaultCtor.cpp | 3 +- .../scheduler/EnqueueWithDependsOnDeps.cpp | 6 +- sycl/unittests/scheduler/FailedCommands.cpp | 4 +- sycl/unittests/scheduler/GraphCleanup.cpp | 4 +- .../scheduler/HostTaskAndBarrier.cpp | 2 +- sycl/unittests/scheduler/InOrderQueueDeps.cpp | 10 +- .../scheduler/InOrderQueueHostTaskDeps.cpp | 4 +- sycl/unittests/scheduler/RequiredWGSize.cpp | 2 +- sycl/unittests/stream/stream.cpp | 4 +- sycl/unittests/xpti_trace/NodeCreation.cpp | 11 +- .../unittests/xpti_trace/QueueApiFailures.cpp | 15 ++- sycl/unittests/xpti_trace/QueueIDCheck.cpp | 6 +- 39 files changed, 302 insertions(+), 296 deletions(-) diff --git a/sycl/unittests/Extensions/CommandGraph/Barrier.cpp b/sycl/unittests/Extensions/CommandGraph/Barrier.cpp index 4d3d8532efb43..f0aea4f665853 100644 --- a/sycl/unittests/Extensions/CommandGraph/Barrier.cpp +++ b/sycl/unittests/Extensions/CommandGraph/Barrier.cpp @@ -14,19 +14,19 @@ using namespace sycl::ext::oneapi; TEST_F(CommandGraphTest, EnqueueBarrier) { Graph.begin_recording(Queue); auto Node1Graph = Queue.submit( - [&](sycl::handler &cgh) { cgh.single_task>([]() {}); }); + [&](sycl::handler &cgh) { cgh.single_task([]() {}); }); auto Node2Graph = Queue.submit( - [&](sycl::handler &cgh) { cgh.single_task>([]() {}); }); + [&](sycl::handler &cgh) { cgh.single_task([]() {}); }); auto Node3Graph = Queue.submit( - [&](sycl::handler &cgh) { cgh.single_task>([]() {}); }); + [&](sycl::handler &cgh) { cgh.single_task([]() {}); }); auto Barrier = Queue.submit([&](sycl::handler &cgh) { cgh.ext_oneapi_barrier(); }); auto Node4Graph = Queue.submit( - [&](sycl::handler &cgh) { cgh.single_task>([]() {}); }); + [&](sycl::handler &cgh) { cgh.single_task([]() {}); }); auto Node5Graph = Queue.submit( - [&](sycl::handler &cgh) { cgh.single_task>([]() {}); }); + [&](sycl::handler &cgh) { cgh.single_task([]() {}); }); Graph.end_recording(Queue); auto GraphImpl = sycl::detail::getSyclObjImpl(Graph); @@ -55,19 +55,19 @@ TEST_F(CommandGraphTest, EnqueueBarrierMultipleQueues) { sycl::queue Queue2{Queue.get_context(), Dev}; Graph.begin_recording({Queue, Queue2}); auto Node1Graph = Queue.submit( - [&](sycl::handler &cgh) { cgh.single_task>([]() {}); }); + [&](sycl::handler &cgh) { cgh.single_task([]() {}); }); auto Node2Graph = Queue.submit( - [&](sycl::handler &cgh) { cgh.single_task>([]() {}); }); + [&](sycl::handler &cgh) { cgh.single_task([]() {}); }); auto Node3Graph = Queue.submit( - [&](sycl::handler &cgh) { cgh.single_task>([]() {}); }); + [&](sycl::handler &cgh) { cgh.single_task([]() {}); }); auto Barrier = Queue2.submit( [&](sycl::handler &cgh) { cgh.ext_oneapi_barrier({Node2Graph}); }); auto Node4Graph = Queue2.submit( - [&](sycl::handler &cgh) { cgh.single_task>([]() {}); }); + [&](sycl::handler &cgh) { cgh.single_task([]() {}); }); auto Node5Graph = Queue2.submit( - [&](sycl::handler &cgh) { cgh.single_task>([]() {}); }); + [&](sycl::handler &cgh) { cgh.single_task([]() {}); }); Graph.end_recording(); auto GraphImpl = sycl::detail::getSyclObjImpl(Graph); @@ -118,21 +118,21 @@ TEST_F(CommandGraphTest, EnqueueBarrierMultipleQueues) { TEST_F(CommandGraphTest, EnqueueBarrierWaitList) { Graph.begin_recording(Queue); auto Node1Graph = Queue.submit( - [&](sycl::handler &cgh) { cgh.single_task>([]() {}); }); + [&](sycl::handler &cgh) { cgh.single_task([]() {}); }); auto Node2Graph = Queue.submit( - [&](sycl::handler &cgh) { cgh.single_task>([]() {}); }); + [&](sycl::handler &cgh) { cgh.single_task([]() {}); }); auto Node3Graph = Queue.submit( - [&](sycl::handler &cgh) { cgh.single_task>([]() {}); }); + [&](sycl::handler &cgh) { cgh.single_task([]() {}); }); auto Barrier = Queue.submit([&](sycl::handler &cgh) { cgh.ext_oneapi_barrier({Node1Graph, Node2Graph}); }); auto Node4Graph = Queue.submit( - [&](sycl::handler &cgh) { cgh.single_task>([]() {}); }); + [&](sycl::handler &cgh) { cgh.single_task([]() {}); }); auto Node5Graph = Queue.submit([&](sycl::handler &cgh) { cgh.depends_on(Node3Graph); - cgh.single_task>([]() {}); + cgh.single_task([]() {}); }); Graph.end_recording(Queue); @@ -167,11 +167,11 @@ TEST_F(CommandGraphTest, EnqueueBarrierWaitListMultipleQueues) { sycl::queue Queue2{Queue.get_context(), Dev}; Graph.begin_recording({Queue, Queue2}); auto Node1Graph = Queue.submit( - [&](sycl::handler &cgh) { cgh.single_task>([]() {}); }); + [&](sycl::handler &cgh) { cgh.single_task([]() {}); }); auto Node2Graph = Queue2.submit( - [&](sycl::handler &cgh) { cgh.single_task>([]() {}); }); + [&](sycl::handler &cgh) { cgh.single_task([]() {}); }); auto Node3Graph = Queue2.submit( - [&](sycl::handler &cgh) { cgh.single_task>([]() {}); }); + [&](sycl::handler &cgh) { cgh.single_task([]() {}); }); // Node1Graph comes from Queue, and Node2Graph comes from Queue2 auto Barrier = Queue.submit([&](sycl::handler &cgh) { @@ -179,10 +179,10 @@ TEST_F(CommandGraphTest, EnqueueBarrierWaitListMultipleQueues) { }); auto Node4Graph = Queue.submit( - [&](sycl::handler &cgh) { cgh.single_task>([]() {}); }); + [&](sycl::handler &cgh) { cgh.single_task([]() {}); }); auto Node5Graph = Queue.submit([&](sycl::handler &cgh) { cgh.depends_on(Node3Graph); - cgh.single_task>([]() {}); + cgh.single_task([]() {}); }); auto Barrier2 = Queue2.submit([&](sycl::handler &cgh) { @@ -223,32 +223,32 @@ TEST_F(CommandGraphTest, EnqueueBarrierWaitListMultipleQueues) { TEST_F(CommandGraphTest, EnqueueMultipleBarrier) { Graph.begin_recording(Queue); auto Node1Graph = Queue.submit( - [&](sycl::handler &cgh) { cgh.single_task>([]() {}); }); + [&](sycl::handler &cgh) { cgh.single_task([]() {}); }); auto Node2Graph = Queue.submit( - [&](sycl::handler &cgh) { cgh.single_task>([]() {}); }); + [&](sycl::handler &cgh) { cgh.single_task([]() {}); }); auto Node3Graph = Queue.submit( - [&](sycl::handler &cgh) { cgh.single_task>([]() {}); }); + [&](sycl::handler &cgh) { cgh.single_task([]() {}); }); auto Barrier1 = Queue.submit([&](sycl::handler &cgh) { cgh.ext_oneapi_barrier({Node1Graph, Node2Graph}); }); auto Node4Graph = Queue.submit( - [&](sycl::handler &cgh) { cgh.single_task>([]() {}); }); + [&](sycl::handler &cgh) { cgh.single_task([]() {}); }); auto Node5Graph = Queue.submit([&](sycl::handler &cgh) { cgh.depends_on(Node3Graph); - cgh.single_task>([]() {}); + cgh.single_task([]() {}); }); auto Barrier2 = Queue.submit([&](sycl::handler &cgh) { cgh.ext_oneapi_barrier(); }); auto Node6Graph = Queue.submit( - [&](sycl::handler &cgh) { cgh.single_task>([]() {}); }); + [&](sycl::handler &cgh) { cgh.single_task([]() {}); }); auto Node7Graph = Queue.submit( - [&](sycl::handler &cgh) { cgh.single_task>([]() {}); }); + [&](sycl::handler &cgh) { cgh.single_task([]() {}); }); auto Node8Graph = Queue.submit( - [&](sycl::handler &cgh) { cgh.single_task>([]() {}); }); + [&](sycl::handler &cgh) { cgh.single_task([]() {}); }); Graph.end_recording(Queue); @@ -312,7 +312,7 @@ TEST_F(CommandGraphTest, InOrderQueueWithPreviousCommand) { sycl::queue InOrderQueue{Dev, Properties}; auto NonGraphEvent = InOrderQueue.submit( - [&](sycl::handler &cgh) { cgh.single_task>([]() {}); }); + [&](sycl::handler &cgh) { cgh.single_task([]() {}); }); Graph.begin_recording(InOrderQueue); @@ -354,10 +354,10 @@ TEST_F(CommandGraphTest, InOrderQueuesWithBarrier) { Graph.begin_recording({InOrderQueue1, InOrderQueue2, InOrderQueue3}); auto Node1 = InOrderQueue1.submit( - [&](sycl::handler &cgh) { cgh.single_task>([]() {}); }); + [&](sycl::handler &cgh) { cgh.single_task([]() {}); }); auto Node2 = InOrderQueue2.submit( - [&](sycl::handler &cgh) { cgh.single_task>([]() {}); }); + [&](sycl::handler &cgh) { cgh.single_task([]() {}); }); InOrderQueue3.ext_oneapi_submit_barrier({Node1}); @@ -402,10 +402,10 @@ TEST_F(CommandGraphTest, InOrderQueuesWithBarrierWaitList) { Graph.begin_recording({InOrderQueue1, InOrderQueue2}); auto Node1 = InOrderQueue1.submit( - [&](sycl::handler &cgh) { cgh.single_task>([]() {}); }); + [&](sycl::handler &cgh) { cgh.single_task([]() {}); }); auto Node2 = InOrderQueue2.submit( - [&](sycl::handler &cgh) { cgh.single_task>([]() {}); }); + [&](sycl::handler &cgh) { cgh.single_task([]() {}); }); auto BarrierNode = InOrderQueue2.ext_oneapi_submit_barrier({Node1}); @@ -443,15 +443,15 @@ TEST_F(CommandGraphTest, InOrderQueuesWithEmptyBarrierWaitList) { Graph.begin_recording({InOrderQueue1, InOrderQueue2}); auto Node1 = InOrderQueue1.submit( - [&](sycl::handler &cgh) { cgh.single_task>([]() {}); }); + [&](sycl::handler &cgh) { cgh.single_task([]() {}); }); auto Node2 = InOrderQueue2.submit( - [&](sycl::handler &cgh) { cgh.single_task>([]() {}); }); + [&](sycl::handler &cgh) { cgh.single_task([]() {}); }); auto BarrierNode = InOrderQueue1.ext_oneapi_submit_barrier(); auto Node3 = InOrderQueue2.submit( - [&](sycl::handler &cgh) { cgh.single_task>([]() {}); }); + [&](sycl::handler &cgh) { cgh.single_task([]() {}); }); Graph.end_recording(); @@ -504,16 +504,16 @@ TEST_F(CommandGraphTest, BarrierMixedQueueTypes) { Graph.begin_recording({InOrderQueue, OutOfOrderQueue}); auto Node1 = OutOfOrderQueue.submit( - [&](sycl::handler &cgh) { cgh.single_task>([]() {}); }); + [&](sycl::handler &cgh) { cgh.single_task([]() {}); }); auto Node2 = OutOfOrderQueue.submit( - [&](sycl::handler &cgh) { cgh.single_task>([]() {}); }); + [&](sycl::handler &cgh) { cgh.single_task([]() {}); }); auto BarrierNode = InOrderQueue.ext_oneapi_submit_barrier({Node1, Node2}); auto Node3 = OutOfOrderQueue.submit([&](sycl::handler &cgh) { cgh.depends_on(Node2); - cgh.single_task>([]() {}); + cgh.single_task([]() {}); }); Graph.end_recording(); @@ -566,14 +566,14 @@ TEST_F(CommandGraphTest, BarrierBetweenExplicitNodes) { InOrderQueue}; auto Node1 = Graph.add( - [&](sycl::handler &cgh) { cgh.single_task>([]() {}); }); + [&](sycl::handler &cgh) { cgh.single_task([]() {}); }); Graph.begin_recording(InOrderQueue); auto BarrierNode = InOrderQueue.ext_oneapi_submit_barrier(); Graph.end_recording(); auto Node2 = Graph.add( - [&](sycl::handler &cgh) { cgh.single_task>([]() {}); }, + [&](sycl::handler &cgh) { cgh.single_task([]() {}); }, {experimental::property::node::depends_on(Node1)}); // Check the graph structure @@ -607,26 +607,26 @@ TEST_F(CommandGraphTest, BarrierMultipleOOOQueue) { Graph.begin_recording({Queue, Queue2}); auto Node1 = Queue.submit( - [&](sycl::handler &cgh) { cgh.single_task>([]() {}); }); + [&](sycl::handler &cgh) { cgh.single_task([]() {}); }); auto Node2 = Queue.submit( - [&](sycl::handler &cgh) { cgh.single_task>([]() {}); }); + [&](sycl::handler &cgh) { cgh.single_task([]() {}); }); auto Node3 = Queue2.submit( - [&](sycl::handler &cgh) { cgh.single_task>([]() {}); }); + [&](sycl::handler &cgh) { cgh.single_task([]() {}); }); auto Node4 = Queue2.submit( - [&](sycl::handler &cgh) { cgh.single_task>([]() {}); }); + [&](sycl::handler &cgh) { cgh.single_task([]() {}); }); auto BarrierNode = Queue.ext_oneapi_submit_barrier(); auto Node5 = Queue2.submit([&](sycl::handler &cgh) { cgh.depends_on({Node3, Node4}); - cgh.single_task>([]() {}); + cgh.single_task([]() {}); }); auto Node6 = Queue.submit( - [&](sycl::handler &cgh) { cgh.single_task>([]() {}); }); + [&](sycl::handler &cgh) { cgh.single_task([]() {}); }); Graph.end_recording(); @@ -685,15 +685,15 @@ TEST_F(CommandGraphTest, BarrierMultipleInOrderQueue) { Graph.begin_recording({InOrderQueue1, InOrderQueue2}); auto Node1 = InOrderQueue1.submit( - [&](sycl::handler &cgh) { cgh.single_task>([]() {}); }); + [&](sycl::handler &cgh) { cgh.single_task([]() {}); }); auto Node2 = InOrderQueue2.submit( - [&](sycl::handler &cgh) { cgh.single_task>([]() {}); }); + [&](sycl::handler &cgh) { cgh.single_task([]() {}); }); auto BarrierNode = InOrderQueue1.ext_oneapi_submit_barrier(); auto Node3 = InOrderQueue2.submit( - [&](sycl::handler &cgh) { cgh.single_task>([]() {}); }); + [&](sycl::handler &cgh) { cgh.single_task([]() {}); }); Graph.end_recording(); @@ -736,15 +736,15 @@ TEST_F(CommandGraphTest, BarrierMultipleMixedOrderQueues) { Graph.begin_recording({Queue, InOrderQueue}); auto Node1 = Queue.submit( - [&](sycl::handler &cgh) { cgh.single_task>([]() {}); }); + [&](sycl::handler &cgh) { cgh.single_task([]() {}); }); auto Node2 = InOrderQueue.submit( - [&](sycl::handler &cgh) { cgh.single_task>([]() {}); }); + [&](sycl::handler &cgh) { cgh.single_task([]() {}); }); auto BarrierNode = Queue.ext_oneapi_submit_barrier(); auto Node3 = InOrderQueue.submit( - [&](sycl::handler &cgh) { cgh.single_task>([]() {}); }); + [&](sycl::handler &cgh) { cgh.single_task([]() {}); }); Graph.end_recording(); @@ -830,9 +830,9 @@ TEST_F(CommandGraphTest, BarrierWithInOrderCommands) { Graph.begin_recording({InOrderQueue1, InOrderQueue2}); auto Node1 = InOrderQueue1.submit( - [&](sycl::handler &cgh) { cgh.single_task>([]() {}); }); + [&](sycl::handler &cgh) { cgh.single_task([]() {}); }); auto Node2 = InOrderQueue2.submit( - [&](sycl::handler &cgh) { cgh.single_task>([]() {}); }); + [&](sycl::handler &cgh) { cgh.single_task([]() {}); }); Graph.end_recording(); Graph.begin_recording({InOrderQueue1, InOrderQueue2}); @@ -842,9 +842,9 @@ TEST_F(CommandGraphTest, BarrierWithInOrderCommands) { Graph.begin_recording({InOrderQueue1, InOrderQueue2}); auto Node3 = InOrderQueue1.submit( - [&](sycl::handler &cgh) { cgh.single_task>([]() {}); }); + [&](sycl::handler &cgh) { cgh.single_task([]() {}); }); auto Node4 = InOrderQueue2.submit( - [&](sycl::handler &cgh) { cgh.single_task>([]() {}); }); + [&](sycl::handler &cgh) { cgh.single_task([]() {}); }); Graph.end_recording(); Graph.begin_recording({InOrderQueue1, InOrderQueue2}); @@ -854,9 +854,9 @@ TEST_F(CommandGraphTest, BarrierWithInOrderCommands) { Graph.begin_recording({InOrderQueue1, InOrderQueue2}); auto Node5 = InOrderQueue1.submit( - [&](sycl::handler &cgh) { cgh.single_task>([]() {}); }); + [&](sycl::handler &cgh) { cgh.single_task([]() {}); }); auto Node6 = InOrderQueue2.submit( - [&](sycl::handler &cgh) { cgh.single_task>([]() {}); }); + [&](sycl::handler &cgh) { cgh.single_task([]() {}); }); Graph.end_recording(); Graph.begin_recording({InOrderQueue1, InOrderQueue2}); diff --git a/sycl/unittests/Extensions/CommandGraph/CommandGraph.cpp b/sycl/unittests/Extensions/CommandGraph/CommandGraph.cpp index fe4731dc31535..3d92ec52c4f06 100644 --- a/sycl/unittests/Extensions/CommandGraph/CommandGraph.cpp +++ b/sycl/unittests/Extensions/CommandGraph/CommandGraph.cpp @@ -56,7 +56,7 @@ TEST_F(CommandGraphTest, AddNode) { ASSERT_TRUE(GraphImpl->MRoots.empty()); auto Node1 = Graph.add( - [&](sycl::handler &cgh) { cgh.single_task>([]() {}); }); + [&](sycl::handler &cgh) { cgh.single_task([]() {}); }); ASSERT_NE(sycl::detail::getSyclObjImpl(Node1), nullptr); ASSERT_FALSE(sycl::detail::getSyclObjImpl(Node1)->isEmpty()); ASSERT_EQ(GraphImpl->MRoots.size(), 1lu); @@ -112,17 +112,17 @@ TEST_F(CommandGraphTest, Finalize) { sycl::buffer Buf(1); auto Node1 = Graph.add([&](sycl::handler &cgh) { sycl::accessor A(Buf, cgh, sycl::write_only, sycl::no_init); - cgh.single_task>([]() {}); + cgh.single_task([]() {}); }); // Add independent node auto Node2 = Graph.add( - [&](sycl::handler &cgh) { cgh.single_task>([]() {}); }); + [&](sycl::handler &cgh) { cgh.single_task([]() {}); }); // Add a node that depends on Node1 due to the accessor auto Node3 = Graph.add([&](sycl::handler &cgh) { sycl::accessor A(Buf, cgh, sycl::read_write); - cgh.single_task>([]() {}); + cgh.single_task([]() {}); }); // Guarantee order of independent nodes 1 and 2 @@ -148,7 +148,7 @@ TEST_F(CommandGraphTest, MakeEdge) { // Add two independent nodes auto Node1 = Graph.add( - [&](sycl::handler &cgh) { cgh.single_task>([]() {}); }); + [&](sycl::handler &cgh) { cgh.single_task([]() {}); }); auto Node2 = Graph.add([&](sycl::handler &cgh) {}); ASSERT_EQ(GraphImpl->MRoots.size(), 2ul); ASSERT_TRUE(sycl::detail::getSyclObjImpl(Node1)->MSuccessors.empty()); @@ -201,7 +201,7 @@ TEST_F(CommandGraphTest, BeginEndRecording) { TEST_F(CommandGraphTest, GetCGCopy) { auto Node1 = Graph.add([&](sycl::handler &cgh) {}); auto Node2 = Graph.add( - [&](sycl::handler &cgh) { cgh.single_task>([]() {}); }, + [&](sycl::handler &cgh) { cgh.single_task([]() {}); }, {experimental::property::node::depends_on(Node1)}); // Get copy of CG of Node2 and check equality @@ -223,11 +223,11 @@ TEST_F(CommandGraphTest, GetCGCopy) { TEST_F(CommandGraphTest, DependencyLeavesKeyword1) { auto Node1Graph = Graph.add( - [&](sycl::handler &cgh) { cgh.single_task>([]() {}); }); + [&](sycl::handler &cgh) { cgh.single_task([]() {}); }); auto Node2Graph = Graph.add( - [&](sycl::handler &cgh) { cgh.single_task>([]() {}); }); + [&](sycl::handler &cgh) { cgh.single_task([]() {}); }); auto Node3Graph = Graph.add( - [&](sycl::handler &cgh) { cgh.single_task>([]() {}); }); + [&](sycl::handler &cgh) { cgh.single_task([]() {}); }); auto EmptyNode = Graph.add([&](sycl::handler &cgh) { /*empty node */ }, @@ -258,13 +258,13 @@ TEST_F(CommandGraphTest, DependencyLeavesKeyword1) { TEST_F(CommandGraphTest, DependencyLeavesKeyword2) { auto Node1Graph = Graph.add( - [&](sycl::handler &cgh) { cgh.single_task>([]() {}); }); + [&](sycl::handler &cgh) { cgh.single_task([]() {}); }); auto Node2Graph = Graph.add( - [&](sycl::handler &cgh) { cgh.single_task>([]() {}); }); + [&](sycl::handler &cgh) { cgh.single_task([]() {}); }); auto Node3Graph = Graph.add( - [&](sycl::handler &cgh) { cgh.single_task>([]() {}); }); + [&](sycl::handler &cgh) { cgh.single_task([]() {}); }); auto Node4Graph = Graph.add( - [&](sycl::handler &cgh) { cgh.single_task>([]() {}); }, + [&](sycl::handler &cgh) { cgh.single_task([]() {}); }, {experimental::property::node::depends_on(Node3Graph)}); auto EmptyNode = @@ -301,17 +301,17 @@ TEST_F(CommandGraphTest, DependencyLeavesKeyword2) { TEST_F(CommandGraphTest, DependencyLeavesKeyword3) { auto Node1Graph = Graph.add( - [&](sycl::handler &cgh) { cgh.single_task>([]() {}); }); + [&](sycl::handler &cgh) { cgh.single_task([]() {}); }); auto Node2Graph = Graph.add( - [&](sycl::handler &cgh) { cgh.single_task>([]() {}); }); + [&](sycl::handler &cgh) { cgh.single_task([]() {}); }); auto EmptyNode = Graph.add([&](sycl::handler &cgh) { /*empty node */ }, {experimental::property::node::depends_on_all_leaves()}); auto Node3Graph = Graph.add( - [&](sycl::handler &cgh) { cgh.single_task>([]() {}); }, + [&](sycl::handler &cgh) { cgh.single_task([]() {}); }, {experimental::property::node::depends_on(Node1Graph)}); auto Node4Graph = Graph.add( - [&](sycl::handler &cgh) { cgh.single_task>([]() {}); }, + [&](sycl::handler &cgh) { cgh.single_task([]() {}); }, {experimental::property::node::depends_on(EmptyNode)}); auto GraphImpl = sycl::detail::getSyclObjImpl(Graph); @@ -344,14 +344,14 @@ TEST_F(CommandGraphTest, DependencyLeavesKeyword3) { TEST_F(CommandGraphTest, DependencyLeavesKeyword4) { auto Node1Graph = Graph.add( - [&](sycl::handler &cgh) { cgh.single_task>([]() {}); }); + [&](sycl::handler &cgh) { cgh.single_task([]() {}); }); auto Node2Graph = Graph.add( - [&](sycl::handler &cgh) { cgh.single_task>([]() {}); }); + [&](sycl::handler &cgh) { cgh.single_task([]() {}); }); auto EmptyNode = Graph.add([&](sycl::handler &cgh) { /*empty node */ }, {experimental::property::node::depends_on_all_leaves()}); auto Node3Graph = Graph.add( - [&](sycl::handler &cgh) { cgh.single_task>([]() {}); }); + [&](sycl::handler &cgh) { cgh.single_task([]() {}); }); auto EmptyNode2 = Graph.add([&](sycl::handler &cgh) { /*empty node */ }, {experimental::property::node::depends_on_all_leaves()}); @@ -389,25 +389,25 @@ TEST_F(CommandGraphTest, GraphPartitionsMerging) { // Tests that the parition merging algo works as expected in case of backward // dependencies auto NodeA = Graph.add( - [&](sycl::handler &cgh) { cgh.single_task>([]() {}); }); + [&](sycl::handler &cgh) { cgh.single_task([]() {}); }); auto NodeB = Graph.add( - [&](sycl::handler &cgh) { cgh.single_task>([]() {}); }, + [&](sycl::handler &cgh) { cgh.single_task([]() {}); }, {experimental::property::node::depends_on(NodeA)}); auto NodeHT1 = Graph.add([&](sycl::handler &cgh) { cgh.host_task([=]() {}); }, {experimental::property::node::depends_on(NodeB)}); auto NodeC = Graph.add( - [&](sycl::handler &cgh) { cgh.single_task>([]() {}); }, + [&](sycl::handler &cgh) { cgh.single_task([]() {}); }, {experimental::property::node::depends_on(NodeHT1)}); auto NodeD = Graph.add( - [&](sycl::handler &cgh) { cgh.single_task>([]() {}); }, + [&](sycl::handler &cgh) { cgh.single_task([]() {}); }, {experimental::property::node::depends_on(NodeB)}); auto NodeHT2 = Graph.add([&](sycl::handler &cgh) { cgh.host_task([=]() {}); }, {experimental::property::node::depends_on(NodeD)}); auto NodeE = Graph.add( - [&](sycl::handler &cgh) { cgh.single_task>([]() {}); }, + [&](sycl::handler &cgh) { cgh.single_task([]() {}); }, {experimental::property::node::depends_on(NodeHT2)}); auto NodeF = Graph.add( - [&](sycl::handler &cgh) { cgh.single_task>([]() {}); }, + [&](sycl::handler &cgh) { cgh.single_task([]() {}); }, {experimental::property::node::depends_on(NodeHT2)}); // Backward dependency @@ -429,7 +429,7 @@ TEST_F(CommandGraphTest, GetNodeFromEvent) { // for an explicit node Graph.begin_recording(Queue); auto EventKernel = Queue.submit( - [&](sycl::handler &cgh) { cgh.single_task>([]() {}); }); + [&](sycl::handler &cgh) { cgh.single_task([]() {}); }); Graph.end_recording(); experimental::node NodeKernelA = @@ -437,12 +437,12 @@ TEST_F(CommandGraphTest, GetNodeFromEvent) { // Add node as a dependency with the property auto NodeKernelB = Graph.add( - [&](sycl::handler &cgh) { cgh.single_task>([]() {}); }, + [&](sycl::handler &cgh) { cgh.single_task([]() {}); }, experimental::property::node::depends_on(NodeKernelA)); // Test adding a dependency through make_edge auto NodeKernelC = Graph.add( - [&](sycl::handler &cgh) { cgh.single_task>([]() {}); }); + [&](sycl::handler &cgh) { cgh.single_task([]() {}); }); ASSERT_NO_THROW(Graph.make_edge(NodeKernelA, NodeKernelC)); auto GraphExec = Graph.finalize(); @@ -527,12 +527,12 @@ void testAccessorModeCombo(sycl::queue Queue) { // Create the first node with a write mode auto EventFirst = Queue.submit([&](handler &CGH) { auto Acc = Buffer.get_access(CGH); - CGH.single_task>([]() {}); + CGH.single_task([]() {}); }); auto EventSecond = Queue.submit([&](handler &CGH) { auto Acc = Buffer.get_access(CGH); - CGH.single_task>([]() {}); + CGH.single_task([]() {}); }); Graph.end_recording(Queue); diff --git a/sycl/unittests/Extensions/CommandGraph/Exceptions.cpp b/sycl/unittests/Extensions/CommandGraph/Exceptions.cpp index f52e690b171e8..6ff9bcea22306 100644 --- a/sycl/unittests/Extensions/CommandGraph/Exceptions.cpp +++ b/sycl/unittests/Extensions/CommandGraph/Exceptions.cpp @@ -281,11 +281,11 @@ TEST_F(CommandGraphTest, ExplicitBarrierDependencyException) { Graph2.begin_recording({Queue}); auto Node = Queue.submit( - [&](sycl::handler &cgh) { cgh.single_task>([]() {}); }); + [&](sycl::handler &cgh) { cgh.single_task([]() {}); }); Graph2.end_recording(); auto Event = Queue.submit( - [&](sycl::handler &cgh) { cgh.single_task>([]() {}); }); + [&](sycl::handler &cgh) { cgh.single_task([]() {}); }); Graph.begin_recording(Queue); @@ -440,9 +440,9 @@ TEST_F(CommandGraphTest, WorkGroupScratchMemoryCheck) { TEST_F(CommandGraphTest, MakeEdgeErrors) { // Set up some nodes in the graph auto NodeA = Graph.add( - [&](sycl::handler &cgh) { cgh.single_task>([]() {}); }); + [&](sycl::handler &cgh) { cgh.single_task([]() {}); }); auto NodeB = Graph.add( - [&](sycl::handler &cgh) { cgh.single_task>([]() {}); }); + [&](sycl::handler &cgh) { cgh.single_task([]() {}); }); // Test error on calling make_edge when a queue is recording to the graph Graph.begin_recording(Queue); @@ -475,7 +475,7 @@ TEST_F(CommandGraphTest, MakeEdgeErrors) { experimental::command_graph GraphOther{ Queue.get_context(), Queue.get_device()}; auto NodeOther = GraphOther.add( - [&](sycl::handler &cgh) { cgh.single_task>([]() {}); }); + [&](sycl::handler &cgh) { cgh.single_task([]() {}); }); ASSERT_THROW( { @@ -598,9 +598,9 @@ TEST_F(CommandGraphTest, InvalidHostAccessor) { TEST_F(CommandGraphTest, ProfilingException) { Graph.begin_recording(Queue); auto Event1 = Queue.submit( - [&](sycl::handler &cgh) { cgh.single_task>([]() {}); }); + [&](sycl::handler &cgh) { cgh.single_task([]() {}); }); auto Event2 = Queue.submit( - [&](sycl::handler &cgh) { cgh.single_task>([]() {}); }); + [&](sycl::handler &cgh) { cgh.single_task([]() {}); }); Graph.end_recording(Queue); try { @@ -617,7 +617,7 @@ TEST_F(CommandGraphTest, ProfilingException) { TEST_F(CommandGraphTest, ProfilingExceptionProperty) { Graph.begin_recording(Queue); auto Event1 = Queue.submit( - [&](sycl::handler &cgh) { cgh.single_task>([]() {}); }); + [&](sycl::handler &cgh) { cgh.single_task([]() {}); }); Graph.end_recording(Queue); // Checks exception thrown if profiling is requested while profiling has @@ -652,7 +652,7 @@ TEST_F(CommandGraphTest, ClusterLaunchException) { try { Graph.begin_recording(Queue); auto Event1 = Queue.submit([&](sycl::handler &cgh) { - cgh.parallel_for>(sycl::nd_range<1>({4096}, {32}), + cgh.parallel_for(sycl::nd_range<1>({4096}, {32}), cluster_launch_property, [&](sycl::nd_item<1> it) {}); }); @@ -751,7 +751,7 @@ TEST_F(CommandGraphTest, DynamicCommandGroupWrongGraph) { experimental::command_graph Graph1{Queue.get_context(), Queue.get_device()}; experimental::command_graph Graph2{Queue.get_context(), Queue.get_device()}; auto CGF = [&](sycl::handler &CGH) { - CGH.single_task>([]() {}); + CGH.single_task([]() {}); }; experimental::dynamic_command_group DynCG(Graph2, {CGF}); diff --git a/sycl/unittests/Extensions/CommandGraph/InOrderQueue.cpp b/sycl/unittests/Extensions/CommandGraph/InOrderQueue.cpp index cb234c687c249..195eaa0398ce8 100644 --- a/sycl/unittests/Extensions/CommandGraph/InOrderQueue.cpp +++ b/sycl/unittests/Extensions/CommandGraph/InOrderQueue.cpp @@ -20,7 +20,7 @@ TEST_F(CommandGraphTest, InOrderQueue) { // Record in-order queue with three nodes InOrderGraph.begin_recording(InOrderQueue); auto Node1Graph = InOrderQueue.submit( - [&](sycl::handler &cgh) { cgh.single_task>([]() {}); }); + [&](sycl::handler &cgh) { cgh.single_task([]() {}); }); auto PtrNode1 = sycl::detail::getSyclObjImpl(InOrderGraph) ->getLastInorderNode( @@ -29,7 +29,7 @@ TEST_F(CommandGraphTest, InOrderQueue) { ASSERT_TRUE(PtrNode1->MPredecessors.empty()); auto Node2Graph = InOrderQueue.submit( - [&](sycl::handler &cgh) { cgh.single_task>([]() {}); }); + [&](sycl::handler &cgh) { cgh.single_task([]() {}); }); auto PtrNode2 = sycl::detail::getSyclObjImpl(InOrderGraph) ->getLastInorderNode( @@ -42,7 +42,7 @@ TEST_F(CommandGraphTest, InOrderQueue) { ASSERT_EQ(PtrNode2->MPredecessors.front().lock(), PtrNode1); auto Node3Graph = InOrderQueue.submit( - [&](sycl::handler &cgh) { cgh.single_task>([]() {}); }); + [&](sycl::handler &cgh) { cgh.single_task([]() {}); }); auto PtrNode3 = sycl::detail::getSyclObjImpl(InOrderGraph) ->getLastInorderNode( @@ -80,7 +80,7 @@ TEST_F(CommandGraphTest, InOrderQueueWithEmpty) { // node InOrderGraph.begin_recording(InOrderQueue); auto Node1Graph = InOrderQueue.submit( - [&](sycl::handler &cgh) { cgh.single_task>([]() {}); }); + [&](sycl::handler &cgh) { cgh.single_task([]() {}); }); auto PtrNode1 = sycl::detail::getSyclObjImpl(InOrderGraph) ->getLastInorderNode( @@ -101,7 +101,7 @@ TEST_F(CommandGraphTest, InOrderQueueWithEmpty) { ASSERT_EQ(PtrNode2->MPredecessors.front().lock(), PtrNode1); auto Node3Graph = InOrderQueue.submit( - [&](sycl::handler &cgh) { cgh.single_task>([]() {}); }); + [&](sycl::handler &cgh) { cgh.single_task([]() {}); }); auto PtrNode3 = sycl::detail::getSyclObjImpl(InOrderGraph) ->getLastInorderNode( @@ -148,7 +148,7 @@ TEST_F(CommandGraphTest, InOrderQueueWithEmptyFirst) { ASSERT_TRUE(PtrNode1->MPredecessors.empty()); auto Node2Graph = InOrderQueue.submit( - [&](sycl::handler &cgh) { cgh.single_task>([]() {}); }); + [&](sycl::handler &cgh) { cgh.single_task([]() {}); }); auto PtrNode2 = sycl::detail::getSyclObjImpl(InOrderGraph) ->getLastInorderNode( @@ -161,7 +161,7 @@ TEST_F(CommandGraphTest, InOrderQueueWithEmptyFirst) { ASSERT_EQ(PtrNode2->MPredecessors.front().lock(), PtrNode1); auto Node3Graph = InOrderQueue.submit( - [&](sycl::handler &cgh) { cgh.single_task>([]() {}); }); + [&](sycl::handler &cgh) { cgh.single_task([]() {}); }); auto PtrNode3 = sycl::detail::getSyclObjImpl(InOrderGraph) ->getLastInorderNode( @@ -200,7 +200,7 @@ TEST_F(CommandGraphTest, InOrderQueueWithEmptyLast) { // Record in-order queue with two regular nodes then an empty node InOrderGraph.begin_recording(InOrderQueue); auto Node1Graph = InOrderQueue.submit( - [&](sycl::handler &cgh) { cgh.single_task>([]() {}); }); + [&](sycl::handler &cgh) { cgh.single_task([]() {}); }); auto PtrNode1 = sycl::detail::getSyclObjImpl(InOrderGraph) ->getLastInorderNode( @@ -209,7 +209,7 @@ TEST_F(CommandGraphTest, InOrderQueueWithEmptyLast) { ASSERT_TRUE(PtrNode1->MPredecessors.empty()); auto Node2Graph = InOrderQueue.submit( - [&](sycl::handler &cgh) { cgh.single_task>([]() {}); }); + [&](sycl::handler &cgh) { cgh.single_task([]() {}); }); auto PtrNode2 = sycl::detail::getSyclObjImpl(InOrderGraph) ->getLastInorderNode( @@ -272,7 +272,7 @@ TEST_F(CommandGraphTest, InOrderQueueWithPreviousHostTask) { // Record in-order queue with three nodes. InOrderGraph.begin_recording(InOrderQueue); auto Node1Graph = InOrderQueue.submit( - [&](sycl::handler &cgh) { cgh.single_task>([]() {}); }); + [&](sycl::handler &cgh) { cgh.single_task([]() {}); }); auto PtrNode1 = sycl::detail::getSyclObjImpl(InOrderGraph) ->getLastInorderNode( @@ -281,7 +281,7 @@ TEST_F(CommandGraphTest, InOrderQueueWithPreviousHostTask) { ASSERT_TRUE(PtrNode1->MPredecessors.empty()); auto Node2Graph = InOrderQueue.submit( - [&](sycl::handler &cgh) { cgh.single_task>([]() {}); }); + [&](sycl::handler &cgh) { cgh.single_task([]() {}); }); auto PtrNode2 = sycl::detail::getSyclObjImpl(InOrderGraph) ->getLastInorderNode( @@ -294,7 +294,7 @@ TEST_F(CommandGraphTest, InOrderQueueWithPreviousHostTask) { ASSERT_EQ(PtrNode2->MPredecessors.front().lock(), PtrNode1); auto Node3Graph = InOrderQueue.submit( - [&](sycl::handler &cgh) { cgh.single_task>([]() {}); }); + [&](sycl::handler &cgh) { cgh.single_task([]() {}); }); auto PtrNode3 = sycl::detail::getSyclObjImpl(InOrderGraph) ->getLastInorderNode( @@ -309,7 +309,7 @@ TEST_F(CommandGraphTest, InOrderQueueWithPreviousHostTask) { InOrderGraph.end_recording(InOrderQueue); auto EventLast = InOrderQueue.submit( - [&](sycl::handler &cgh) { cgh.single_task>([]() {}); }); + [&](sycl::handler &cgh) { cgh.single_task([]() {}); }); auto EventLastImpl = sycl::detail::getSyclObjImpl(EventLast); auto WaitList = EventLastImpl->getWaitList(); @@ -343,7 +343,7 @@ TEST_F(CommandGraphTest, InOrderQueueHostTaskAndGraph) { // Record in-order queue with three nodes. InOrderGraph.begin_recording(InOrderQueue); auto Node1Graph = InOrderQueue.submit( - [&](sycl::handler &cgh) { cgh.single_task>([]() {}); }); + [&](sycl::handler &cgh) { cgh.single_task([]() {}); }); auto PtrNode1 = sycl::detail::getSyclObjImpl(InOrderGraph) ->getLastInorderNode( @@ -352,7 +352,7 @@ TEST_F(CommandGraphTest, InOrderQueueHostTaskAndGraph) { ASSERT_TRUE(PtrNode1->MPredecessors.empty()); auto Node2Graph = InOrderQueue.submit( - [&](sycl::handler &cgh) { cgh.single_task>([]() {}); }); + [&](sycl::handler &cgh) { cgh.single_task([]() {}); }); auto PtrNode2 = sycl::detail::getSyclObjImpl(InOrderGraph) ->getLastInorderNode( @@ -365,7 +365,7 @@ TEST_F(CommandGraphTest, InOrderQueueHostTaskAndGraph) { ASSERT_EQ(PtrNode2->MPredecessors.front().lock(), PtrNode1); auto Node3Graph = InOrderQueue.submit( - [&](sycl::handler &cgh) { cgh.single_task>([]() {}); }); + [&](sycl::handler &cgh) { cgh.single_task([]() {}); }); auto PtrNode3 = sycl::detail::getSyclObjImpl(InOrderGraph) ->getLastInorderNode( @@ -388,7 +388,7 @@ TEST_F(CommandGraphTest, InOrderQueueHostTaskAndGraph) { auto EventGraphImpl = sycl::detail::getSyclObjImpl(EventGraph); auto EventLast = InOrderQueue.submit( - [&](sycl::handler &cgh) { cgh.single_task>([]() {}); }); + [&](sycl::handler &cgh) { cgh.single_task([]() {}); }); auto EventLastImpl = sycl::detail::getSyclObjImpl(EventLast); auto EventLastWaitList = EventLastImpl->getWaitList(); // Previous task is not a host task. Explicit dependency is still needed @@ -425,7 +425,7 @@ TEST_F(CommandGraphTest, InOrderQueueMemsetAndGraph) { // Record in-order queue with three nodes. InOrderGraph.begin_recording(InOrderQueue); auto Node1Graph = InOrderQueue.submit( - [&](sycl::handler &cgh) { cgh.single_task>([]() {}); }); + [&](sycl::handler &cgh) { cgh.single_task([]() {}); }); auto PtrNode1 = sycl::detail::getSyclObjImpl(InOrderGraph) ->getLastInorderNode( @@ -434,7 +434,7 @@ TEST_F(CommandGraphTest, InOrderQueueMemsetAndGraph) { ASSERT_TRUE(PtrNode1->MPredecessors.empty()); auto Node2Graph = InOrderQueue.submit( - [&](sycl::handler &cgh) { cgh.single_task>([]() {}); }); + [&](sycl::handler &cgh) { cgh.single_task([]() {}); }); auto PtrNode2 = sycl::detail::getSyclObjImpl(InOrderGraph) ->getLastInorderNode( @@ -447,7 +447,7 @@ TEST_F(CommandGraphTest, InOrderQueueMemsetAndGraph) { ASSERT_EQ(PtrNode2->MPredecessors.front().lock(), PtrNode1); auto Node3Graph = InOrderQueue.submit( - [&](sycl::handler &cgh) { cgh.single_task>([]() {}); }); + [&](sycl::handler &cgh) { cgh.single_task([]() {}); }); auto PtrNode3 = sycl::detail::getSyclObjImpl(InOrderGraph) ->getLastInorderNode( @@ -489,7 +489,7 @@ TEST_F(CommandGraphTest, InOrderQueueMemcpyAndGraph) { // Record in-order queue with three nodes. InOrderGraph.begin_recording(InOrderQueue); auto Node1Graph = InOrderQueue.submit( - [&](sycl::handler &cgh) { cgh.single_task>([]() {}); }); + [&](sycl::handler &cgh) { cgh.single_task([]() {}); }); auto PtrNode1 = sycl::detail::getSyclObjImpl(InOrderGraph) ->getLastInorderNode( @@ -498,7 +498,7 @@ TEST_F(CommandGraphTest, InOrderQueueMemcpyAndGraph) { ASSERT_TRUE(PtrNode1->MPredecessors.empty()); auto Node2Graph = InOrderQueue.submit( - [&](sycl::handler &cgh) { cgh.single_task>([]() {}); }); + [&](sycl::handler &cgh) { cgh.single_task([]() {}); }); auto PtrNode2 = sycl::detail::getSyclObjImpl(InOrderGraph) ->getLastInorderNode( @@ -511,7 +511,7 @@ TEST_F(CommandGraphTest, InOrderQueueMemcpyAndGraph) { ASSERT_EQ(PtrNode2->MPredecessors.front().lock(), PtrNode1); auto Node3Graph = InOrderQueue.submit( - [&](sycl::handler &cgh) { cgh.single_task>([]() {}); }); + [&](sycl::handler &cgh) { cgh.single_task([]() {}); }); auto PtrNode3 = sycl::detail::getSyclObjImpl(InOrderGraph) ->getLastInorderNode( @@ -544,13 +544,13 @@ TEST_F(CommandGraphTest, InOrderQueueEventless) { // Record in-order queue with three nodes InOrderGraph.begin_recording(InOrderQueue); auto Node1Graph = InOrderQueue.submit( - [&](sycl::handler &cgh) { cgh.single_task>([]() {}); }); + [&](sycl::handler &cgh) { cgh.single_task([]() {}); }); auto Node2Graph = InOrderQueue.submit( - [&](sycl::handler &cgh) { cgh.single_task>([]() {}); }); + [&](sycl::handler &cgh) { cgh.single_task([]() {}); }); auto Node3Graph = InOrderQueue.submit( - [&](sycl::handler &cgh) { cgh.single_task>([]() {}); }); + [&](sycl::handler &cgh) { cgh.single_task([]() {}); }); InOrderGraph.end_recording(InOrderQueue); @@ -591,13 +591,13 @@ TEST_F(CommandGraphTest, InOrderQueueRequestEvent) { // Record in-order queue with three nodes InOrderGraph.begin_recording(InOrderQueue); auto Node1Graph = InOrderQueue.submit( - [&](sycl::handler &cgh) { cgh.single_task>([]() {}); }); + [&](sycl::handler &cgh) { cgh.single_task([]() {}); }); auto Node2Graph = InOrderQueue.submit( - [&](sycl::handler &cgh) { cgh.single_task>([]() {}); }); + [&](sycl::handler &cgh) { cgh.single_task([]() {}); }); auto Node3Graph = InOrderQueue.submit( - [&](sycl::handler &cgh) { cgh.single_task>([]() {}); }); + [&](sycl::handler &cgh) { cgh.single_task([]() {}); }); InOrderGraph.end_recording(InOrderQueue); @@ -645,13 +645,13 @@ TEST_F(CommandGraphTest, InOrderQueueEventlessWithDependency) { // Record in-order queue with three nodes InOrderGraph.begin_recording(InOrderQueue); auto Node1Graph = InOrderQueue.submit( - [&](sycl::handler &cgh) { cgh.single_task>([]() {}); }); + [&](sycl::handler &cgh) { cgh.single_task([]() {}); }); auto Node2Graph = InOrderQueue.submit( - [&](sycl::handler &cgh) { cgh.single_task>([]() {}); }); + [&](sycl::handler &cgh) { cgh.single_task([]() {}); }); auto Node3Graph = InOrderQueue.submit( - [&](sycl::handler &cgh) { cgh.single_task>([]() {}); }); + [&](sycl::handler &cgh) { cgh.single_task([]() {}); }); InOrderGraph.end_recording(InOrderQueue); @@ -672,7 +672,7 @@ TEST_F(CommandGraphTest, InOrderQueueEventlessWithDependency) { sycl::event Event = sycl::ext::oneapi::experimental::submit_with_event( OtherQueue, [&](sycl::handler &CGH) { - sycl::ext::oneapi::experimental::single_task>( + sycl::ext::oneapi::experimental::single_task( CGH, [=]() {}); }); diff --git a/sycl/unittests/Extensions/CommandGraph/MultiThreaded.cpp b/sycl/unittests/Extensions/CommandGraph/MultiThreaded.cpp index bab7909310ee6..f1e1970397a5d 100644 --- a/sycl/unittests/Extensions/CommandGraph/MultiThreaded.cpp +++ b/sycl/unittests/Extensions/CommandGraph/MultiThreaded.cpp @@ -31,18 +31,18 @@ namespace { /// @param Q Queue to submit nodes to. void runKernels(queue Q) { auto NodeA = Q.submit( - [&](sycl::handler &cgh) { cgh.single_task>([]() {}); }); + [&](sycl::handler &cgh) { cgh.single_task([]() {}); }); auto NodeB = Q.submit([&](sycl::handler &cgh) { cgh.depends_on(NodeA); - cgh.single_task>([]() {}); + cgh.single_task([]() {}); }); auto NodeC = Q.submit([&](sycl::handler &cgh) { cgh.depends_on(NodeA); - cgh.single_task>([]() {}); + cgh.single_task([]() {}); }); auto NodeD = Q.submit([&](sycl::handler &cgh) { cgh.depends_on({NodeB, NodeC}); - cgh.single_task>([]() {}); + cgh.single_task([]() {}); }); } @@ -50,13 +50,13 @@ void runKernels(queue Q) { /// @param Q Queue to submit nodes to. void runKernelsInOrder(queue Q) { auto NodeA = Q.submit( - [&](sycl::handler &cgh) { cgh.single_task>([]() {}); }); + [&](sycl::handler &cgh) { cgh.single_task([]() {}); }); auto NodeB = Q.submit( - [&](sycl::handler &cgh) { cgh.single_task>([]() {}); }); + [&](sycl::handler &cgh) { cgh.single_task([]() {}); }); auto NodeC = Q.submit( - [&](sycl::handler &cgh) { cgh.single_task>([]() {}); }); + [&](sycl::handler &cgh) { cgh.single_task([]() {}); }); auto NodeD = Q.submit( - [&](sycl::handler &cgh) { cgh.single_task>([]() {}); }); + [&](sycl::handler &cgh) { cgh.single_task([]() {}); }); } /// Adds four kernels with diamond dependency to the Graph G @@ -64,15 +64,15 @@ void runKernelsInOrder(queue Q) { void addKernels( experimental::command_graph G) { auto NodeA = G.add( - [&](sycl::handler &cgh) { cgh.single_task>([]() {}); }); + [&](sycl::handler &cgh) { cgh.single_task([]() {}); }); auto NodeB = - G.add([&](sycl::handler &cgh) { cgh.single_task>([]() {}); }, + G.add([&](sycl::handler &cgh) { cgh.single_task([]() {}); }, {experimental::property::node::depends_on(NodeA)}); auto NodeC = - G.add([&](sycl::handler &cgh) { cgh.single_task>([]() {}); }, + G.add([&](sycl::handler &cgh) { cgh.single_task([]() {}); }, {experimental::property::node::depends_on(NodeA)}); auto NodeD = - G.add([&](sycl::handler &cgh) { cgh.single_task>([]() {}); }, + G.add([&](sycl::handler &cgh) { cgh.single_task([]() {}); }, {experimental::property::node::depends_on(NodeB, NodeC)}); } diff --git a/sycl/unittests/Extensions/CommandGraph/Properties.cpp b/sycl/unittests/Extensions/CommandGraph/Properties.cpp index a896e4ba47314..16a70a8aee365 100644 --- a/sycl/unittests/Extensions/CommandGraph/Properties.cpp +++ b/sycl/unittests/Extensions/CommandGraph/Properties.cpp @@ -21,7 +21,7 @@ class UnknownProperty : public ::sycl::detail::DataLessProperty< TEST_F(CommandGraphTest, PropertiesCheckInvalidNode) { try { auto Node1 = Graph.add( - [&](sycl::handler &cgh) { cgh.single_task>([]() {}); }, + [&](sycl::handler &cgh) { cgh.single_task([]() {}); }, UnknownProperty{}); } catch (sycl::exception &e) { EXPECT_EQ(e.code(), sycl::errc::invalid); diff --git a/sycl/unittests/Extensions/CommandGraph/Queries.cpp b/sycl/unittests/Extensions/CommandGraph/Queries.cpp index 35057a19b86b7..51568f02745fd 100644 --- a/sycl/unittests/Extensions/CommandGraph/Queries.cpp +++ b/sycl/unittests/Extensions/CommandGraph/Queries.cpp @@ -30,7 +30,7 @@ TEST_F(CommandGraphTest, GetNodeQueries) { // Add some nodes to the graph for testing and test after each addition. auto RootA = Graph.add( - [&](sycl::handler &cgh) { cgh.single_task>([]() {}); }); + [&](sycl::handler &cgh) { cgh.single_task([]() {}); }); { auto GraphRoots = Graph.get_root_nodes(); auto GraphNodes = Graph.get_nodes(); @@ -38,7 +38,7 @@ TEST_F(CommandGraphTest, GetNodeQueries) { ASSERT_EQ(GraphNodes.size(), 1lu); } auto RootB = Graph.add( - [&](sycl::handler &cgh) { cgh.single_task>([]() {}); }); + [&](sycl::handler &cgh) { cgh.single_task([]() {}); }); { auto GraphRoots = Graph.get_root_nodes(); auto GraphNodes = Graph.get_nodes(); @@ -46,7 +46,7 @@ TEST_F(CommandGraphTest, GetNodeQueries) { ASSERT_EQ(GraphNodes.size(), 2lu); } auto NodeA = Graph.add( - [&](sycl::handler &cgh) { cgh.single_task>([]() {}); }, + [&](sycl::handler &cgh) { cgh.single_task([]() {}); }, {experimental::property::node::depends_on(RootA, RootB)}); { auto GraphRoots = Graph.get_root_nodes(); @@ -55,7 +55,7 @@ TEST_F(CommandGraphTest, GetNodeQueries) { ASSERT_EQ(GraphNodes.size(), 3lu); } auto NodeB = Graph.add( - [&](sycl::handler &cgh) { cgh.single_task>([]() {}); }, + [&](sycl::handler &cgh) { cgh.single_task([]() {}); }, {experimental::property::node::depends_on(RootB)}); { auto GraphRoots = Graph.get_root_nodes(); @@ -64,7 +64,7 @@ TEST_F(CommandGraphTest, GetNodeQueries) { ASSERT_EQ(GraphNodes.size(), 4lu); } auto RootC = Graph.add( - [&](sycl::handler &cgh) { cgh.single_task>([]() {}); }); + [&](sycl::handler &cgh) { cgh.single_task([]() {}); }); { auto GraphRoots = Graph.get_root_nodes(); auto GraphNodes = Graph.get_nodes(); @@ -101,7 +101,7 @@ TEST_F(CommandGraphTest, NodeTypeQueries) { int *PtrB = malloc_device(16, Queue); auto NodeKernel = Graph.add( - [&](sycl::handler &cgh) { cgh.single_task>([]() {}); }); + [&](sycl::handler &cgh) { cgh.single_task([]() {}); }); ASSERT_EQ(NodeKernel.get_type(), experimental::node_type::kernel); auto NodeMemcpy = Graph.add( diff --git a/sycl/unittests/Extensions/CommandGraph/Regressions.cpp b/sycl/unittests/Extensions/CommandGraph/Regressions.cpp index a1428b597b6d4..0833357f14357 100644 --- a/sycl/unittests/Extensions/CommandGraph/Regressions.cpp +++ b/sycl/unittests/Extensions/CommandGraph/Regressions.cpp @@ -39,19 +39,19 @@ TEST_F(CommandGraphTest, AccessorModeRegression) { auto AccA = BufferA.get_access(CGH); auto AccB = BufferB.get_access(CGH); auto AccC = BufferC.get_access(CGH); - CGH.single_task>([]() {}); + CGH.single_task([]() {}); }); auto EventB = Queue.submit([&](handler &CGH) { auto AccA = BufferA.get_access(CGH); auto AccB = BufferB.get_access(CGH); auto AccD = BufferD.get_access(CGH); - CGH.single_task>([]() {}); + CGH.single_task([]() {}); }); auto EventC = Queue.submit([&](handler &CGH) { auto AccA = BufferA.get_access(CGH); auto AccB = BufferB.get_access(CGH); auto AccE = BufferE.get_access(CGH); - CGH.single_task>([]() {}); + CGH.single_task([]() {}); }); ScopedGraph.end_recording(Queue); @@ -74,7 +74,7 @@ TEST_F(CommandGraphTest, QueueRecordBarrierMultipleGraph) { Graph.begin_recording(Queue); auto NodeKernel = Queue.submit( - [&](sycl::handler &cgh) { cgh.single_task>([]() {}); }); + [&](sycl::handler &cgh) { cgh.single_task([]() {}); }); Queue.ext_oneapi_submit_barrier({NodeKernel}); Graph.end_recording(Queue); @@ -82,7 +82,7 @@ TEST_F(CommandGraphTest, QueueRecordBarrierMultipleGraph) { Queue}; GraphB.begin_recording(Queue); auto NodeKernelB = Queue.submit( - [&](sycl::handler &cgh) { cgh.single_task>([]() {}); }); + [&](sycl::handler &cgh) { cgh.single_task([]() {}); }); Queue.ext_oneapi_submit_barrier({NodeKernelB}); GraphB.end_recording(Queue); @@ -90,7 +90,7 @@ TEST_F(CommandGraphTest, QueueRecordBarrierMultipleGraph) { Queue}; GraphC.begin_recording(Queue); auto NodeKernelC = Queue.submit( - [&](sycl::handler &cgh) { cgh.single_task>([]() {}); }); + [&](sycl::handler &cgh) { cgh.single_task([]() {}); }); Queue.ext_oneapi_submit_barrier(); GraphC.end_recording(Queue); } diff --git a/sycl/unittests/Extensions/CommandGraph/Subgraph.cpp b/sycl/unittests/Extensions/CommandGraph/Subgraph.cpp index e41db02b496ef..85ecbd5a19724 100644 --- a/sycl/unittests/Extensions/CommandGraph/Subgraph.cpp +++ b/sycl/unittests/Extensions/CommandGraph/Subgraph.cpp @@ -14,21 +14,21 @@ using namespace sycl::ext::oneapi; TEST_F(CommandGraphTest, SubGraph) { // Add sub-graph with two nodes auto Node1Graph = Graph.add( - [&](sycl::handler &cgh) { cgh.single_task>([]() {}); }); + [&](sycl::handler &cgh) { cgh.single_task([]() {}); }); auto Node2Graph = Graph.add( - [&](sycl::handler &cgh) { cgh.single_task>([]() {}); }, + [&](sycl::handler &cgh) { cgh.single_task([]() {}); }, {experimental::property::node::depends_on(Node1Graph)}); auto GraphExec = Graph.finalize(); // Add node to main graph followed by sub-graph and another node experimental::command_graph MainGraph(Queue.get_context(), Dev); auto Node1MainGraph = MainGraph.add( - [&](sycl::handler &cgh) { cgh.single_task>([]() {}); }); + [&](sycl::handler &cgh) { cgh.single_task([]() {}); }); auto Node2MainGraph = MainGraph.add([&](handler &CGH) { CGH.ext_oneapi_graph(GraphExec); }, {experimental::property::node::depends_on(Node1MainGraph)}); auto Node3MainGraph = MainGraph.add( - [&](sycl::handler &cgh) { cgh.single_task>([]() {}); }, + [&](sycl::handler &cgh) { cgh.single_task([]() {}); }, {experimental::property::node::depends_on(Node2MainGraph)}); // Assert order of the added sub-graph @@ -79,12 +79,12 @@ TEST_F(CommandGraphTest, SubGraph) { TEST_F(CommandGraphTest, SubGraphWithEmptyNode) { // Add sub-graph with two nodes auto Node1Graph = Graph.add( - [&](sycl::handler &cgh) { cgh.single_task>([]() {}); }); + [&](sycl::handler &cgh) { cgh.single_task([]() {}); }); auto Empty1Graph = Graph.add([&](sycl::handler &cgh) { /*empty node */ }, {experimental::property::node::depends_on(Node1Graph)}); auto Node2Graph = Graph.add( - [&](sycl::handler &cgh) { cgh.single_task>([]() {}); }, + [&](sycl::handler &cgh) { cgh.single_task([]() {}); }, {experimental::property::node::depends_on(Empty1Graph)}); auto GraphExec = Graph.finalize(); @@ -92,12 +92,12 @@ TEST_F(CommandGraphTest, SubGraphWithEmptyNode) { // Add node to main graph followed by sub-graph and another node experimental::command_graph MainGraph(Queue.get_context(), Dev); auto Node1MainGraph = MainGraph.add( - [&](sycl::handler &cgh) { cgh.single_task>([]() {}); }); + [&](sycl::handler &cgh) { cgh.single_task([]() {}); }); auto Node2MainGraph = MainGraph.add([&](handler &CGH) { CGH.ext_oneapi_graph(GraphExec); }, {experimental::property::node::depends_on(Node1MainGraph)}); auto Node3MainGraph = MainGraph.add( - [&](sycl::handler &cgh) { cgh.single_task>([]() {}); }, + [&](sycl::handler &cgh) { cgh.single_task([]() {}); }, {experimental::property::node::depends_on(Node2MainGraph)}); // Assert order of the added sub-graph @@ -154,9 +154,9 @@ TEST_F(CommandGraphTest, SubGraphWithEmptyNode) { TEST_F(CommandGraphTest, SubGraphWithEmptyNodeLast) { // Add sub-graph with two nodes auto Node1Graph = Graph.add( - [&](sycl::handler &cgh) { cgh.single_task>([]() {}); }); + [&](sycl::handler &cgh) { cgh.single_task([]() {}); }); auto Node2Graph = Graph.add( - [&](sycl::handler &cgh) { cgh.single_task>([]() {}); }, + [&](sycl::handler &cgh) { cgh.single_task([]() {}); }, {experimental::property::node::depends_on(Node1Graph)}); auto EmptyGraph = Graph.add([&](sycl::handler &cgh) { /*empty node */ }, @@ -167,12 +167,12 @@ TEST_F(CommandGraphTest, SubGraphWithEmptyNodeLast) { // Add node to main graph followed by sub-graph and another node experimental::command_graph MainGraph(Queue.get_context(), Dev); auto Node1MainGraph = MainGraph.add( - [&](sycl::handler &cgh) { cgh.single_task>([]() {}); }); + [&](sycl::handler &cgh) { cgh.single_task([]() {}); }); auto Node2MainGraph = MainGraph.add([&](handler &CGH) { CGH.ext_oneapi_graph(GraphExec); }, {experimental::property::node::depends_on(Node1MainGraph)}); auto Node3MainGraph = MainGraph.add( - [&](sycl::handler &cgh) { cgh.single_task>([]() {}); }, + [&](sycl::handler &cgh) { cgh.single_task([]() {}); }, {experimental::property::node::depends_on(Node2MainGraph)}); // Assert order of the added sub-graph @@ -230,10 +230,10 @@ TEST_F(CommandGraphTest, RecordSubGraph) { // Record sub-graph with two nodes Graph.begin_recording(Queue); auto Node1Graph = Queue.submit( - [&](sycl::handler &cgh) { cgh.single_task>([]() {}); }); + [&](sycl::handler &cgh) { cgh.single_task([]() {}); }); auto Node2Graph = Queue.submit([&](sycl::handler &cgh) { cgh.depends_on(Node1Graph); - cgh.single_task>([]() {}); + cgh.single_task([]() {}); }); Graph.end_recording(Queue); auto GraphExec = Graph.finalize(); @@ -242,14 +242,14 @@ TEST_F(CommandGraphTest, RecordSubGraph) { experimental::command_graph MainGraph(Queue.get_context(), Dev); MainGraph.begin_recording(Queue); auto Node1MainGraph = Queue.submit( - [&](sycl::handler &cgh) { cgh.single_task>([]() {}); }); + [&](sycl::handler &cgh) { cgh.single_task([]() {}); }); auto Node2MainGraph = Queue.submit([&](handler &cgh) { cgh.depends_on(Node1MainGraph); cgh.ext_oneapi_graph(GraphExec); }); auto Node3MainGraph = Queue.submit([&](sycl::handler &cgh) { cgh.depends_on(Node2MainGraph); - cgh.single_task>([]() {}); + cgh.single_task([]() {}); }); MainGraph.end_recording(Queue); diff --git a/sycl/unittests/Extensions/CommandGraph/TopologicalSort.cpp b/sycl/unittests/Extensions/CommandGraph/TopologicalSort.cpp index f78fd0ff50e9c..47583c1708b55 100644 --- a/sycl/unittests/Extensions/CommandGraph/TopologicalSort.cpp +++ b/sycl/unittests/Extensions/CommandGraph/TopologicalSort.cpp @@ -29,19 +29,19 @@ TEST_F(CommandGraphTest, CheckTopologicalSort) { size_t NumNodes = 7; experimental::command_graph Graph{Queue.get_context(), Dev}; auto Node6 = Graph.add( - [&](sycl::handler &cgh) { cgh.single_task>([]() {}); }); + [&](sycl::handler &cgh) { cgh.single_task([]() {}); }); auto Node3 = Graph.add( - [&](sycl::handler &cgh) { cgh.single_task>([]() {}); }); + [&](sycl::handler &cgh) { cgh.single_task([]() {}); }); auto Node0 = Graph.add( - [&](sycl::handler &cgh) { cgh.single_task>([]() {}); }); + [&](sycl::handler &cgh) { cgh.single_task([]() {}); }); auto Node1 = Graph.add( - [&](sycl::handler &cgh) { cgh.single_task>([]() {}); }); + [&](sycl::handler &cgh) { cgh.single_task([]() {}); }); auto Node2 = Graph.add( - [&](sycl::handler &cgh) { cgh.single_task>([]() {}); }); + [&](sycl::handler &cgh) { cgh.single_task([]() {}); }); auto Node5 = Graph.add( - [&](sycl::handler &cgh) { cgh.single_task>([]() {}); }); + [&](sycl::handler &cgh) { cgh.single_task([]() {}); }); auto Node4 = Graph.add( - [&](sycl::handler &cgh) { cgh.single_task>([]() {}); }); + [&](sycl::handler &cgh) { cgh.single_task([]() {}); }); Graph.make_edge(Node6, Node3); Graph.make_edge(Node6, Node0); diff --git a/sycl/unittests/Extensions/CommandGraph/Update.cpp b/sycl/unittests/Extensions/CommandGraph/Update.cpp index 1f38e08eeb559..a6eff48af35f7 100644 --- a/sycl/unittests/Extensions/CommandGraph/Update.cpp +++ b/sycl/unittests/Extensions/CommandGraph/Update.cpp @@ -13,7 +13,7 @@ using namespace sycl::ext::oneapi; TEST_F(CommandGraphTest, UpdatableException) { auto Node = Graph.add( - [&](sycl::handler &cgh) { cgh.single_task>([]() {}); }); + [&](sycl::handler &cgh) { cgh.single_task([]() {}); }); auto ExecGraphUpdatable = Graph.finalize(experimental::property::graph::updatable{}); @@ -36,7 +36,7 @@ TEST_F(CommandGraphTest, DynamicObjRegister) { auto Node = OtherGraph.add([&](sycl::handler &cgh) { // This should not throw EXPECT_NO_THROW(cgh.set_arg(0, DynObj)); - cgh.single_task>([]() {}); + cgh.single_task([]() {}); }); }; @@ -59,7 +59,7 @@ TEST_F(CommandGraphTest, UpdateNodeNotInGraph) { auto OtherGraph = experimental::command_graph(Queue.get_context(), Queue.get_device()); auto OtherNode = OtherGraph.add( - [&](sycl::handler &cgh) { cgh.single_task>([]() {}); }); + [&](sycl::handler &cgh) { cgh.single_task([]() {}); }); auto ExecGraph = Graph.finalize(experimental::property::graph::updatable{}); EXPECT_ANY_THROW(ExecGraph.update(OtherNode)); @@ -70,7 +70,7 @@ TEST_F(CommandGraphTest, UpdateWithUnchangedNode) { // parameters is not an error auto Node = Graph.add( - [&](sycl::handler &cgh) { cgh.single_task>([]() {}); }); + [&](sycl::handler &cgh) { cgh.single_task([]() {}); }); auto ExecGraph = Graph.finalize(experimental::property::graph::updatable{}); EXPECT_NO_THROW(ExecGraph.update(Node)); @@ -87,7 +87,7 @@ TEST_F(CommandGraphTest, UpdateNodeTypeExceptions) { ASSERT_NO_THROW(auto NodeKernel = Graph.add([&](sycl::handler &cgh) { cgh.set_arg(0, DynObj); - cgh.single_task>([]() {}); + cgh.single_task([]() {}); })); ASSERT_ANY_THROW(auto NodeMemcpy = Graph.add([&](sycl::handler &cgh) { @@ -161,7 +161,7 @@ TEST_F(CommandGraphTest, UpdateRangeErrors) { nd_range<1> NDRange{range{128}, range{32}}; range<1> Range{128}; auto NodeNDRange = Graph.add([&](sycl::handler &cgh) { - cgh.parallel_for>(NDRange, [](nd_item<1>) {}); + cgh.parallel_for(NDRange, [](nd_item<1>) {}); }); // OK @@ -174,7 +174,7 @@ TEST_F(CommandGraphTest, UpdateRangeErrors) { EXPECT_ANY_THROW(NodeNDRange.update_range(range<3>{32, 32, 1})); auto NodeRange = Graph.add([&](sycl::handler &cgh) { - cgh.parallel_for>(range<1>{128}, [](item<1>) {}); + cgh.parallel_for(range<1>{128}, [](item<1>) {}); }); // OK @@ -202,7 +202,7 @@ class WholeGraphUpdateTest : public CommandGraphTest { UpdateGraph; std::function EmptyKernel = [&](handler &CGH) { - CGH.parallel_for>(range<1>(Size), [=](item<1> Id) {}); + CGH.parallel_for(range<1>(Size), [=](item<1> Id) {}); }; }; @@ -441,7 +441,7 @@ TEST_F(CommandGraphTest, CheckFinalizeBehavior) { // Check that both finalize with and without updatable property work as // expected auto Node = Graph.add( - [&](sycl::handler &cgh) { cgh.single_task>([]() {}); }); + [&](sycl::handler &cgh) { cgh.single_task([]() {}); }); mock::getCallbacks().set_after_callback( "urCommandBufferGetInfoExp", &redefinedCommandBufferGetInfoExpAfter); mock::getCallbacks().set_after_callback( diff --git a/sycl/unittests/Extensions/EnqueueFunctionsEvents.cpp b/sycl/unittests/Extensions/EnqueueFunctionsEvents.cpp index e170628b83a7c..8de4e4082987f 100644 --- a/sycl/unittests/Extensions/EnqueueFunctionsEvents.cpp +++ b/sycl/unittests/Extensions/EnqueueFunctionsEvents.cpp @@ -43,7 +43,7 @@ TEST_F(EnqueueFunctionsEventsTests, SubmitSingleTaskNoEvent) { &redefined_urEnqueueKernelLaunch); oneapiext::submit(Q, [&](handler &CGH) { - oneapiext::single_task>(CGH, []() {}); + oneapiext::single_task(CGH, []() {}); }); ASSERT_EQ(counter_urEnqueueKernelLaunch, size_t{1}); @@ -53,7 +53,7 @@ TEST_F(EnqueueFunctionsEventsTests, SingleTaskShortcutNoEvent) { mock::getCallbacks().set_replace_callback("urEnqueueKernelLaunch", &redefined_urEnqueueKernelLaunch); - oneapiext::single_task>(Q, []() {}); + oneapiext::single_task(Q, []() {}); ASSERT_EQ(counter_urEnqueueKernelLaunch, size_t{1}); } @@ -64,7 +64,7 @@ TEST_F(EnqueueFunctionsEventsTests, SubmitSingleTaskKernelNoEvent) { mock::getCallbacks().set_after_callback("urKernelGetInfo", &after_urKernelGetInfo); - auto KID = get_kernel_id>(); + auto KID = get_kernel_id(); auto KB = get_kernel_bundle( Q.get_context(), std::vector{KID}); @@ -83,7 +83,7 @@ TEST_F(EnqueueFunctionsEventsTests, SingleTaskShortcutKernelNoEvent) { mock::getCallbacks().set_after_callback("urKernelGetInfo", &after_urKernelGetInfo); - auto KID = get_kernel_id>(); + auto KID = get_kernel_id(); auto KB = get_kernel_bundle( Q.get_context(), std::vector{KID}); @@ -101,7 +101,7 @@ TEST_F(EnqueueFunctionsEventsTests, SubmitRangeParallelForNoEvent) { &redefined_urEnqueueKernelLaunch); oneapiext::submit(Q, [&](handler &CGH) { - oneapiext::parallel_for>(CGH, range<1>{32}, [](item<1>) {}); + oneapiext::parallel_for(CGH, range<1>{32}, [](item<1>) {}); }); ASSERT_EQ(counter_urEnqueueKernelLaunch, size_t{1}); @@ -111,7 +111,7 @@ TEST_F(EnqueueFunctionsEventsTests, RangeParallelForShortcutNoEvent) { mock::getCallbacks().set_replace_callback("urEnqueueKernelLaunch", &redefined_urEnqueueKernelLaunch); - oneapiext::parallel_for>(Q, range<1>{32}, [](item<1>) {}); + oneapiext::parallel_for(Q, range<1>{32}, [](item<1>) {}); ASSERT_EQ(counter_urEnqueueKernelLaunch, size_t{1}); } @@ -122,7 +122,7 @@ TEST_F(EnqueueFunctionsEventsTests, SubmitRangeParallelForKernelNoEvent) { mock::getCallbacks().set_after_callback("urKernelGetInfo", &after_urKernelGetInfo); - auto KID = get_kernel_id>(); + auto KID = get_kernel_id(); auto KB = get_kernel_bundle( Q.get_context(), std::vector{KID}); @@ -142,7 +142,7 @@ TEST_F(EnqueueFunctionsEventsTests, RangeParallelForShortcutKernelNoEvent) { mock::getCallbacks().set_after_callback("urKernelGetInfo", &after_urKernelGetInfo); - auto KID = get_kernel_id>(); + auto KID = get_kernel_id(); auto KB = get_kernel_bundle( Q.get_context(), std::vector{KID}); @@ -160,7 +160,7 @@ TEST_F(EnqueueFunctionsEventsTests, SubmitNDLaunchNoEvent) { &redefined_urEnqueueKernelLaunch); oneapiext::submit(Q, [&](handler &CGH) { - oneapiext::nd_launch>( + oneapiext::nd_launch( CGH, nd_range<1>{range<1>{32}, range<1>{32}}, [](nd_item<1>) {}); }); @@ -171,7 +171,7 @@ TEST_F(EnqueueFunctionsEventsTests, NDLaunchShortcutNoEvent) { mock::getCallbacks().set_replace_callback("urEnqueueKernelLaunch", &redefined_urEnqueueKernelLaunch); - oneapiext::nd_launch>(Q, nd_range<1>{range<1>{32}, range<1>{32}}, + oneapiext::nd_launch(Q, nd_range<1>{range<1>{32}, range<1>{32}}, [](nd_item<1>) {}); ASSERT_EQ(counter_urEnqueueKernelLaunch, size_t{1}); @@ -183,7 +183,7 @@ TEST_F(EnqueueFunctionsEventsTests, SubmitNDLaunchKernelNoEvent) { mock::getCallbacks().set_after_callback("urKernelGetInfo", &after_urKernelGetInfo); - auto KID = get_kernel_id>(); + auto KID = get_kernel_id(); auto KB = get_kernel_bundle( Q.get_context(), std::vector{KID}); @@ -203,7 +203,7 @@ TEST_F(EnqueueFunctionsEventsTests, NDLaunchShortcutKernelNoEvent) { mock::getCallbacks().set_after_callback("urKernelGetInfo", &after_urKernelGetInfo); - auto KID = get_kernel_id>(); + auto KID = get_kernel_id(); auto KB = get_kernel_bundle( Q.get_context(), std::vector{KID}); @@ -380,7 +380,7 @@ TEST_F(EnqueueFunctionsEventsTests, BarrierBeforeHostTask) { mock::getCallbacks().set_after_callback( "urEnqueueEventsWaitWithBarrier", &after_urEnqueueEventsWaitWithBarrier); - oneapiext::single_task>(Q, []() {}); + oneapiext::single_task(Q, []() {}); std::chrono::time_point HostTaskTimestamp; Q.submit([&](handler &CGH) { diff --git a/sycl/unittests/Extensions/FreeFunctionCommands/Barrier.cpp b/sycl/unittests/Extensions/FreeFunctionCommands/Barrier.cpp index f5dc107068f77..5273bb98ea70f 100644 --- a/sycl/unittests/Extensions/FreeFunctionCommands/Barrier.cpp +++ b/sycl/unittests/Extensions/FreeFunctionCommands/Barrier.cpp @@ -54,8 +54,8 @@ TEST(BarrierTest, EventBarrierShortcut) { sycl::queue Queue1; sycl::queue Queue2; - sycl::event Event1 = Queue1.single_task>([]() {}); - sycl::event Event2 = Queue2.single_task>([]() {}); + sycl::event Event1 = Queue1.single_task([]() {}); + sycl::event Event2 = Queue2.single_task([]() {}); sycl::khr::event_barrier(Queue2, {Event1, Event2}); @@ -73,8 +73,8 @@ TEST(BarrierTest, EventBarrier) { sycl::queue Queue1; sycl::queue Queue2; - sycl::event Event1 = Queue1.single_task>([]() {}); - sycl::event Event2 = Queue2.single_task>([]() {}); + sycl::event Event1 = Queue1.single_task([]() {}); + sycl::event Event2 = Queue2.single_task([]() {}); sycl::khr::submit(Queue2, [&](sycl::handler &Handler) { sycl::khr::event_barrier(Handler, {Event1, Event2}); diff --git a/sycl/unittests/Extensions/GetLastEvent.cpp b/sycl/unittests/Extensions/GetLastEvent.cpp index 82b3e7a4756bd..889a8f9da036b 100644 --- a/sycl/unittests/Extensions/GetLastEvent.cpp +++ b/sycl/unittests/Extensions/GetLastEvent.cpp @@ -48,7 +48,7 @@ TEST(GetLastEventEmptyQueue, CheckEventlessWorkQueue) { // The following single_task does not return an event, so it is expected that // the last event query creates a new marker event. - sycl::ext::oneapi::experimental::single_task>(Q, []() {}); + sycl::ext::oneapi::experimental::single_task(Q, []() {}); std::optional E = Q.ext_oneapi_get_last_event(); ASSERT_TRUE(E.has_value()); ur_event_handle_t UREvent = detail::getSyclObjImpl(*E)->getHandle(); diff --git a/sycl/unittests/Extensions/KernelProperties.cpp b/sycl/unittests/Extensions/KernelProperties.cpp index 4206af94179c6..df14f2416998b 100644 --- a/sycl/unittests/Extensions/KernelProperties.cpp +++ b/sycl/unittests/Extensions/KernelProperties.cpp @@ -44,7 +44,7 @@ class KernelPropertiesTests : public ::testing::Test { inline sycl::kernel GetTestKernel() { auto KB = sycl::get_kernel_bundle( Q.get_context()); - return KB.get_kernel>(); + return KB.get_kernel(); } template void RunForwardProgressTest(const FuncT &F) { diff --git a/sycl/unittests/Extensions/KernelQueries/SpillMemorySize.cpp b/sycl/unittests/Extensions/KernelQueries/SpillMemorySize.cpp index 9ef918908d116..02636bc13d82c 100644 --- a/sycl/unittests/Extensions/KernelQueries/SpillMemorySize.cpp +++ b/sycl/unittests/Extensions/KernelQueries/SpillMemorySize.cpp @@ -46,7 +46,7 @@ class KernelQueriesTests : public ::testing::Test { inline sycl::kernel GetTestKernel() { auto KB = sycl::get_kernel_bundle( Queue.get_context()); - return KB.get_kernel>(); + return KB.get_kernel(); } protected: @@ -64,7 +64,7 @@ TEST(KernelQueriesBasicTests, NoAspect) { sycl::queue q{sycl::context(sycl::platform()), sycl::default_selector_v}; auto KB = sycl::get_kernel_bundle(q.get_context()); - auto kernel = KB.get_kernel>(); + auto kernel = KB.get_kernel(); const auto dev = q.get_device(); try { kernel.template get_info< diff --git a/sycl/unittests/Extensions/LaunchQueries.cpp b/sycl/unittests/Extensions/LaunchQueries.cpp index 7585454b18e4e..de2f4c7535fea 100644 --- a/sycl/unittests/Extensions/LaunchQueries.cpp +++ b/sycl/unittests/Extensions/LaunchQueries.cpp @@ -13,7 +13,7 @@ #include namespace syclex = sycl::ext::oneapi::experimental; -const auto KernelID = sycl::get_kernel_id>(); +const auto KernelID = sycl::get_kernel_id(); inline ur_result_t redefine_urKernelGetGroupInfo_Success(void *pParams) { auto params = reinterpret_cast(pParams); diff --git a/sycl/unittests/SYCL2020/GetNativeOpenCL.cpp b/sycl/unittests/SYCL2020/GetNativeOpenCL.cpp index db36d59fb0153..fc33d3fa7a168 100644 --- a/sycl/unittests/SYCL2020/GetNativeOpenCL.cpp +++ b/sycl/unittests/SYCL2020/GetNativeOpenCL.cpp @@ -111,8 +111,7 @@ TEST(GetNative, GetNativeHandle) { sycl::buffer Buffer(&Data[0], sycl::range<1>(1)); Queue.submit([&](sycl::handler &cgh) { auto Acc = Buffer.get_access(cgh); - constexpr size_t KS = sizeof(decltype(Acc)); - cgh.single_task>([=]() { (void)Acc; }); + cgh.single_task([=]() { (void)Acc; }); }); EXPECT_EQ(mockOpenCLNumContextRetains(), 0ul); diff --git a/sycl/unittests/buffer/BufferLocation.cpp b/sycl/unittests/buffer/BufferLocation.cpp index e0edc08add908..b469e24b6203c 100644 --- a/sycl/unittests/buffer/BufferLocation.cpp +++ b/sycl/unittests/buffer/BufferLocation.cpp @@ -130,8 +130,7 @@ TEST_F(BufferTest, BufferLocationOnly) { sycl::ext::oneapi::accessor_property_list< sycl::ext::intel::property::buffer_location::instance<2>>> Acc{Buf, cgh, sycl::read_write, PL}; - constexpr size_t KS = sizeof(decltype(Acc)); - cgh.single_task>([=]() { Acc[0] = 4; }); + cgh.single_task([=]() { Acc[0] = 4; }); }) .wait(); EXPECT_EQ(PassedLocation, (uint64_t)2); @@ -159,9 +158,7 @@ TEST_F(BufferTest, BufferLocationWithAnotherProp) { sycl::ext::oneapi::property::no_alias::instance, sycl::ext::intel::property::buffer_location::instance<5>>> Acc{Buf, cgh, sycl::write_only, PL}; - - constexpr size_t KS = sizeof(decltype(Acc)); - cgh.single_task>([=]() { Acc[0] = 4; }); + cgh.single_task([=]() { Acc[0] = 4; }); }) .wait(); EXPECT_EQ(PassedLocation, (uint64_t)5); @@ -217,8 +214,7 @@ TEST_F(BufferTest, WOBufferLocation) { sycl::access::placeholder::false_t, sycl::ext::oneapi::accessor_property_list<>> Acc{Buf, cgh, sycl::read_write}; - constexpr size_t KS = sizeof(decltype(Acc)); - cgh.single_task>([=]() { Acc[0] = 4; }); + cgh.single_task([=]() { Acc[0] = 4; }); }) .wait(); EXPECT_EQ(PassedLocation, DEFAULT_VALUE); diff --git a/sycl/unittests/buffer/MemChannel.cpp b/sycl/unittests/buffer/MemChannel.cpp index 9dc04491b3dff..71ba5a7b225fd 100644 --- a/sycl/unittests/buffer/MemChannel.cpp +++ b/sycl/unittests/buffer/MemChannel.cpp @@ -100,8 +100,7 @@ TEST_F(BufferMemChannelTest, MemChannelProp) { Q.submit([&](sycl::handler &CGH) { sycl::accessor Acc{Buf, CGH, sycl::read_write}; - constexpr size_t KS = sizeof(decltype(Acc)); - CGH.single_task>([=]() { Acc[0] = 4; }); + CGH.single_task([=]() { Acc[0] = 4; }); }).wait(); EXPECT_EQ(PassedChannel, (uint32_t)42); } diff --git a/sycl/unittests/buffer/SubbufferLargeSize.cpp b/sycl/unittests/buffer/SubbufferLargeSize.cpp index 71d7c67dc9354..f924084b93a32 100644 --- a/sycl/unittests/buffer/SubbufferLargeSize.cpp +++ b/sycl/unittests/buffer/SubbufferLargeSize.cpp @@ -70,7 +70,7 @@ TEST_F(LargeBufferSizeTest, MoreThan32bit) { Subbuffer1.get_access(cgh); auto SubbufferAcc2 = Subbuffer2.get_access(cgh); - cgh.single_task>([=]() {}); + cgh.single_task([=]() {}); }) .wait(); diff --git a/sycl/unittests/event/EventDestruction.cpp b/sycl/unittests/event/EventDestruction.cpp index d0f2b1336c28d..b54c8550381e7 100644 --- a/sycl/unittests/event/EventDestruction.cpp +++ b/sycl/unittests/event/EventDestruction.cpp @@ -51,10 +51,10 @@ TEST_F(EventDestructionTest, EventDestruction) { { sycl::event E0 = Queue.submit( - [&](sycl::handler &cgh) { cgh.single_task>([]() {}); }); + [&](sycl::handler &cgh) { cgh.single_task([]() {}); }); E1 = Queue.submit([&](sycl::handler &cgh) { cgh.depends_on(E0); - cgh.single_task>([]() {}); + cgh.single_task([]() {}); }); E1.wait(); } @@ -67,7 +67,7 @@ TEST_F(EventDestructionTest, EventDestruction) { sycl::event E2 = Queue.submit([&](sycl::handler &cgh) { cgh.depends_on(E1); - cgh.single_task>([]() {}); + cgh.single_task([]() {}); }); E2.wait(); // Dependencies of E1 should be cleared here. It depends on E0. @@ -75,7 +75,7 @@ TEST_F(EventDestructionTest, EventDestruction) { sycl::event E3 = Queue.submit([&](sycl::handler &cgh) { cgh.depends_on({E1, E2}); - cgh.single_task>([]() {}); + cgh.single_task([]() {}); }); E3.wait(); // Dependency of E1 has already cleared. E2 depends on E1 that @@ -89,20 +89,20 @@ TEST_F(EventDestructionTest, EventDestruction) { sycl::buffer Buf(&data[0], sycl::range<1>(2)); Queue.submit([&](sycl::handler &cgh) { auto Acc = Buf.get_access(cgh); - cgh.single_task>([=]() {}); + cgh.single_task([=]() {}); }); Queue.submit([&](sycl::handler &cgh) { auto Acc = Buf.get_access(cgh); - cgh.single_task>([=]() {}); + cgh.single_task([=]() {}); }); sycl::event E1 = Queue.submit([&](sycl::handler &cgh) { auto Acc = Buf.get_access(cgh); - cgh.single_task>([=]() {}); + cgh.single_task([=]() {}); }); sycl::event E2 = Queue.submit([&](sycl::handler &cgh) { auto Acc = Buf.get_access(cgh); - cgh.single_task>([=]() {}); + cgh.single_task([=]() {}); }); E2.wait(); // Dependencies are deleted through one level of dependencies. When @@ -151,10 +151,10 @@ TEST_F(EventDestructionTest, GetWaitList) { { sycl::event E0 = Queue.submit( - [&](sycl::handler &cgh) { cgh.single_task>([]() {}); }); + [&](sycl::handler &cgh) { cgh.single_task([]() {}); }); E1 = Queue.submit([&](sycl::handler &cgh) { cgh.depends_on(E0); - cgh.single_task>([]() {}); + cgh.single_task([]() {}); }); E1.wait(); auto wait_list = E1.get_wait_list(); @@ -168,13 +168,13 @@ TEST_F(EventDestructionTest, GetWaitList) { sycl::event E2 = Queue.submit([&](sycl::handler &cgh) { cgh.depends_on(E1); - cgh.single_task>([]() {}); + cgh.single_task([]() {}); }); E2.wait(); sycl::event E3 = Queue.submit([&](sycl::handler &cgh) { cgh.depends_on({E1, E2}); - cgh.single_task>([]() {}); + cgh.single_task([]() {}); }); E3.wait(); diff --git a/sycl/unittests/helpers/TestKernel.hpp b/sycl/unittests/helpers/TestKernel.hpp index 722a6e825a55f..afdc5c52780ad 100644 --- a/sycl/unittests/helpers/TestKernel.hpp +++ b/sycl/unittests/helpers/TestKernel.hpp @@ -11,21 +11,36 @@ #include "MockDeviceImage.hpp" #include "MockKernelInfo.hpp" -template class TestKernel; +class TestKernel; +class TestKernelWithAcc; + namespace sycl { inline namespace _V1 { namespace detail { -template -struct KernelInfo> +template <> +struct KernelInfo : public unittest::MockKernelInfoBase { static constexpr const char *getName() { return "TestKernel"; } - static constexpr int64_t getKernelSize() { return KernelSize; } + static constexpr int64_t getKernelSize() { return 1; } static constexpr const char *getFileName() { return "TestKernel.hpp"; } static constexpr const char *getFunctionName() { return "TestKernelFunctionName"; } - static constexpr unsigned getLineNumber() { return 13; } + static constexpr unsigned getLineNumber() { return 14; } + static constexpr unsigned getColumnNumber() { return 8; } +}; + +template <> +struct KernelInfo + : public unittest::MockKernelInfoBase { + static constexpr const char *getName() { return "TestKernelWithAcc"; } + static constexpr int64_t getKernelSize() { return sizeof(sycl::accessor); } + static constexpr const char *getFileName() { return "TestKernel.hpp"; } + static constexpr const char *getFunctionName() { + return "TestKernelWithAccFunctionName"; + } + static constexpr unsigned getLineNumber() { return 15; } static constexpr unsigned getColumnNumber() { return 8; } }; @@ -33,6 +48,6 @@ struct KernelInfo> } // namespace _V1 } // namespace sycl -static sycl::unittest::MockDeviceImage Img = - sycl::unittest::generateDefaultImage({"TestKernel"}); -static sycl::unittest::MockDeviceImageArray<1> ImgArray{&Img}; +static sycl::unittest::MockDeviceImage Imgs[] = {sycl::unittest::generateDefaultImage({"TestKernel"}), + sycl::unittest::generateDefaultImage({"TestKernelWithAcc"})}; +static sycl::unittest::MockDeviceImageArray<2> ImgArray{Imgs}; diff --git a/sycl/unittests/program_manager/SubDevices.cpp b/sycl/unittests/program_manager/SubDevices.cpp index 6153453eefc16..bcfa67d7f55ae 100644 --- a/sycl/unittests/program_manager/SubDevices.cpp +++ b/sycl/unittests/program_manager/SubDevices.cpp @@ -113,7 +113,7 @@ TEST(SubDevices, DISABLED_BuildProgramForSubdevices) { sycl::detail::createSyclObjFromImpl(subDev2)}}; // Create device binary description structures for getBuiltPIProgram API. - auto devBin = Img.convertToNativeType(); + auto devBin = Imgs[0].convertToNativeType(); sycl_device_binaries_struct devBinStruct{SYCL_DEVICE_BINARIES_VERSION, 1, &devBin, nullptr, nullptr}; sycl::detail::ProgramManager::getInstance().addImages(&devBinStruct); @@ -121,10 +121,10 @@ TEST(SubDevices, DISABLED_BuildProgramForSubdevices) { // Build program via getBuiltPIProgram API sycl::detail::ProgramManager::getInstance().getBuiltURProgram( *sycl::detail::getSyclObjImpl(Ctx), subDev1, - sycl::detail::KernelInfo>::getName()); + sycl::detail::KernelInfo::getName()); // This call should re-use built binary from the cache. If urProgramBuild is // called again, the test will fail as second call of redefinedProgramBuild sycl::detail::ProgramManager::getInstance().getBuiltURProgram( *sycl::detail::getSyclObjImpl(Ctx), subDev2, - sycl::detail::KernelInfo>::getName()); + sycl::detail::KernelInfo::getName()); } diff --git a/sycl/unittests/program_manager/itt_annotations.cpp b/sycl/unittests/program_manager/itt_annotations.cpp index c865882162f78..1a2db4a206ed1 100644 --- a/sycl/unittests/program_manager/itt_annotations.cpp +++ b/sycl/unittests/program_manager/itt_annotations.cpp @@ -56,7 +56,7 @@ TEST(ITTNotify, UseKernelBundle) { auto ExecBundle = sycl::build(KernelBundle); Queue.submit([&](sycl::handler &CGH) { CGH.use_kernel_bundle(ExecBundle); - CGH.single_task>([] {}); // Actual kernel does not matter + CGH.single_task([] {}); // Actual kernel does not matter }); EXPECT_EQ(HasITTEnabled, true); @@ -83,7 +83,7 @@ TEST(ITTNotify, VarNotSet) { auto ExecBundle = sycl::build(KernelBundle); Queue.submit([&](sycl::handler &CGH) { CGH.use_kernel_bundle(ExecBundle); - CGH.single_task>([] {}); // Actual kernel does not matter + CGH.single_task([] {}); // Actual kernel does not matter }); EXPECT_EQ(HasITTEnabled, false); diff --git a/sycl/unittests/queue/Barrier.cpp b/sycl/unittests/queue/Barrier.cpp index b02eec23a7917..94b126eee5cb4 100644 --- a/sycl/unittests/queue/Barrier.cpp +++ b/sycl/unittests/queue/Barrier.cpp @@ -29,9 +29,9 @@ TEST(Queue, HandlerBarrier) { sycl::queue Q; Q.submit( - [&](sycl::handler &cgh) { cgh.single_task>([=]() {}); }); + [&](sycl::handler &cgh) { cgh.single_task([=]() {}); }); Q.submit( - [&](sycl::handler &cgh) { cgh.single_task>([=]() {}); }); + [&](sycl::handler &cgh) { cgh.single_task([=]() {}); }); Q.submit([&](sycl::handler &cgh) { cgh.ext_oneapi_barrier(); }); @@ -48,9 +48,9 @@ TEST(Queue, ExtOneAPISubmitBarrier) { sycl::queue Q; Q.submit( - [&](sycl::handler &cgh) { cgh.single_task>([=]() {}); }); + [&](sycl::handler &cgh) { cgh.single_task([=]() {}); }); Q.submit( - [&](sycl::handler &cgh) { cgh.single_task>([=]() {}); }); + [&](sycl::handler &cgh) { cgh.single_task([=]() {}); }); Q.ext_oneapi_submit_barrier(); @@ -69,9 +69,9 @@ TEST(Queue, HandlerBarrierWithWaitList) { sycl::queue Q3; auto E1 = Q1.submit( - [&](sycl::handler &cgh) { cgh.single_task>([=]() {}); }); + [&](sycl::handler &cgh) { cgh.single_task([=]() {}); }); auto E2 = Q2.submit( - [&](sycl::handler &cgh) { cgh.single_task>([=]() {}); }); + [&](sycl::handler &cgh) { cgh.single_task([=]() {}); }); Q3.submit([&](sycl::handler &cgh) { cgh.ext_oneapi_barrier({E1, E2}); }); @@ -90,9 +90,9 @@ TEST(Queue, ExtOneAPISubmitBarrierWithWaitList) { sycl::queue Q3; auto E1 = Q1.submit( - [&](sycl::handler &cgh) { cgh.single_task>([=]() {}); }); + [&](sycl::handler &cgh) { cgh.single_task([=]() {}); }); auto E2 = Q2.submit( - [&](sycl::handler &cgh) { cgh.single_task>([=]() {}); }); + [&](sycl::handler &cgh) { cgh.single_task([=]() {}); }); Q3.ext_oneapi_submit_barrier({E1, E2}); diff --git a/sycl/unittests/queue/GetProfilingInfo.cpp b/sycl/unittests/queue/GetProfilingInfo.cpp index b7551907e310d..cab6cd3179918 100644 --- a/sycl/unittests/queue/GetProfilingInfo.cpp +++ b/sycl/unittests/queue/GetProfilingInfo.cpp @@ -312,7 +312,7 @@ TEST(GetProfilingInfo, DeviceTimerCalled = false; event E = Queue.submit( - [&](handler &cgh) { cgh.single_task>([]() {}); }); + [&](handler &cgh) { cgh.single_task([]() {}); }); EXPECT_FALSE(DeviceTimerCalled); } @@ -340,7 +340,7 @@ TEST(GetProfilingInfo, check_command_submission_time_with_host_accessor) { event E = Queue.submit([&](handler &cgh) { accessor writeRes{Buf, cgh, read_write}; - cgh.single_task>([]() {}); + cgh.single_task([]() {}); }); EXPECT_TRUE(DeviceTimerCalled); diff --git a/sycl/unittests/scheduler/AccessorDefaultCtor.cpp b/sycl/unittests/scheduler/AccessorDefaultCtor.cpp index 4ced36fbf47e6..319e442eb6aee 100644 --- a/sycl/unittests/scheduler/AccessorDefaultCtor.cpp +++ b/sycl/unittests/scheduler/AccessorDefaultCtor.cpp @@ -27,8 +27,7 @@ TEST_F(SchedulerTest, AccDefaultCtorDoesntAffectDepGraph) { sycl::accessor B; - constexpr size_t KernelSize = sizeof(B); - MockCGH.single_task>([=]() { + MockCGH.single_task([=]() { int size = B.size(); (void)size; }); diff --git a/sycl/unittests/scheduler/EnqueueWithDependsOnDeps.cpp b/sycl/unittests/scheduler/EnqueueWithDependsOnDeps.cpp index 22ba0d3bd2cab..29025cd63cd96 100644 --- a/sycl/unittests/scheduler/EnqueueWithDependsOnDeps.cpp +++ b/sycl/unittests/scheduler/EnqueueWithDependsOnDeps.cpp @@ -73,7 +73,7 @@ class DependsOnTests : public ::testing::Test { QueueDevImpl->get_context()); auto ExecBundle = sycl::build(KernelBundle); MockCGH.use_kernel_bundle(ExecBundle); - MockCGH.single_task>([] {}); + MockCGH.single_task([] {}); } std::unique_ptr CmdGroup = MockCGH.finalize(); @@ -337,7 +337,7 @@ TEST_F(DependsOnTests, ShortcutFunctionWithWaitList) { auto SingleTaskEvent = Queue.submit([&](sycl::handler &cgh) { cgh.depends_on(HostTaskEvent); - cgh.single_task>([] {}); + cgh.single_task([] {}); }); std::shared_ptr SingleTaskEventImpl = detail::getSyclObjImpl(SingleTaskEvent); @@ -383,7 +383,7 @@ TEST_F(DependsOnTests, BarrierWithWaitList) { auto SingleTaskEvent = Queue.submit([&](sycl::handler &cgh) { cgh.depends_on(HostTaskEvent); - cgh.single_task>([] {}); + cgh.single_task([] {}); }); std::shared_ptr SingleTaskEventImpl = detail::getSyclObjImpl(SingleTaskEvent); diff --git a/sycl/unittests/scheduler/FailedCommands.cpp b/sycl/unittests/scheduler/FailedCommands.cpp index f297f5da7a7eb..8207ade72f0d0 100644 --- a/sycl/unittests/scheduler/FailedCommands.cpp +++ b/sycl/unittests/scheduler/FailedCommands.cpp @@ -61,7 +61,7 @@ void RunWithFailedCommandsAndCheck(bool SyncExceptionExpected, try { Queue.submit([&](sycl::handler &CGH) { Buf.get_access(CGH); - CGH.single_task>([]() {}); + CGH.single_task([]() {}); }); } catch (...) { ExceptionThrown = true; @@ -129,7 +129,7 @@ TEST(FailedCommandsTest, CheckUREventReleaseWithKernel) { { try { Queue.submit( - [&](sycl::handler &CGH) { CGH.single_task>([]() {}); }); + [&](sycl::handler &CGH) { CGH.single_task([]() {}); }); } catch (...) { } } diff --git a/sycl/unittests/scheduler/GraphCleanup.cpp b/sycl/unittests/scheduler/GraphCleanup.cpp index 4a69bf1cf89ff..58713a05bc46f 100644 --- a/sycl/unittests/scheduler/GraphCleanup.cpp +++ b/sycl/unittests/scheduler/GraphCleanup.cpp @@ -327,7 +327,7 @@ TEST_F(SchedulerTest, StreamBufferDeallocation) { MockCGH.use_kernel_bundle(ExecBundle); stream Stream{1, 1, MockCGH}; MockCGH.addStream(detail::getSyclObjImpl(Stream)); - MockCGH.single_task>([] {}); + MockCGH.single_task([] {}); std::unique_ptr CG = MockCGH.finalize(); EventImplPtr = MSPtr->addCG(std::move(CG), QueueImpl, /*EventNeeded=*/true); @@ -398,7 +398,7 @@ TEST_F(SchedulerTest, AuxiliaryResourcesDeallocation) { MockCGH.use_kernel_bundle(ExecBundle); MockCGH.addReduction(std::move(MockAuxResourcePtr)); MockCGH.addReduction(std::move(BufPtr)); - MockCGH.single_task>([] {}); + MockCGH.single_task([] {}); std::unique_ptr CG = MockCGH.finalize(); EventImplPtr = MSPtr->addCG(std::move(CG), QueueImpl, /*EventNeeded=*/true); diff --git a/sycl/unittests/scheduler/HostTaskAndBarrier.cpp b/sycl/unittests/scheduler/HostTaskAndBarrier.cpp index e5ab7b00a2a28..11886b8368424 100644 --- a/sycl/unittests/scheduler/HostTaskAndBarrier.cpp +++ b/sycl/unittests/scheduler/HostTaskAndBarrier.cpp @@ -65,7 +65,7 @@ class BarrierHandlingWithHostTask : public ::testing::Test { return QueueDevImpl->submit(sycl::detail::type_erased_cgfo_ty{L}, nullptr, {}, true); } else if (Type == TestCGType::KERNEL_TASK) { - auto L = [&](handler &CGH) { CGH.single_task>([] {}); }; + auto L = [&](handler &CGH) { CGH.single_task([] {}); }; return QueueDevImpl->submit(sycl::detail::type_erased_cgfo_ty{L}, nullptr, {}, true); } else // (Type == TestCGType::BARRIER) diff --git a/sycl/unittests/scheduler/InOrderQueueDeps.cpp b/sycl/unittests/scheduler/InOrderQueueDeps.cpp index e511ce3cba2e8..da306e5899632 100644 --- a/sycl/unittests/scheduler/InOrderQueueDeps.cpp +++ b/sycl/unittests/scheduler/InOrderQueueDeps.cpp @@ -97,7 +97,7 @@ ur_result_t redefinedEnqueueEventsWaitWithBarrierExt(void *pParams) { sycl::event submitKernel(sycl::queue &Q) { return Q.submit( - [&](handler &cgh) { cgh.single_task>([]() {}); }); + [&](handler &cgh) { cgh.single_task([]() {}); }); } TEST_F(SchedulerTest, InOrderQueueIsolatedDeps) { @@ -149,10 +149,10 @@ TEST_F(SchedulerTest, TwoInOrderQueuesOnSameContext) { property::queue::in_order()}; event EvFirst = InOrderQueueFirst.submit( - [&](sycl::handler &CGH) { CGH.single_task>([] {}); }); + [&](sycl::handler &CGH) { CGH.single_task([] {}); }); std::ignore = InOrderQueueSecond.submit([&](sycl::handler &CGH) { CGH.depends_on(EvFirst); - CGH.single_task>([] {}); + CGH.single_task([] {}); }); InOrderQueueFirst.wait(); @@ -175,10 +175,10 @@ TEST_F(SchedulerTest, InOrderQueueNoSchedulerPath) { queue InOrderQueue{Ctx, default_selector_v, property::queue::in_order()}; event EvFirst = InOrderQueue.submit( - [&](sycl::handler &CGH) { CGH.single_task>([] {}); }); + [&](sycl::handler &CGH) { CGH.single_task([] {}); }); std::ignore = InOrderQueue.submit([&](sycl::handler &CGH) { CGH.depends_on(EvFirst); - CGH.single_task>([] {}); + CGH.single_task([] {}); }); InOrderQueue.wait(); diff --git a/sycl/unittests/scheduler/InOrderQueueHostTaskDeps.cpp b/sycl/unittests/scheduler/InOrderQueueHostTaskDeps.cpp index 55020ddd5d963..31a2914e2c803 100644 --- a/sycl/unittests/scheduler/InOrderQueueHostTaskDeps.cpp +++ b/sycl/unittests/scheduler/InOrderQueueHostTaskDeps.cpp @@ -100,7 +100,7 @@ TEST_F(SchedulerTest, InOrderQueueCrossDeps) { event Ev2 = InOrderQueue.submit([&](sycl::handler &CGH) { CGH.use_kernel_bundle(ExecBundle); - CGH.single_task>([] {}); + CGH.single_task([] {}); }); { @@ -146,7 +146,7 @@ TEST_F(SchedulerTest, InOrderQueueCrossDepsShortcutFuncs) { event Ev1 = InOrderQueue.memset(buf, 0, sizeof(buf[0])); - event Ev2 = InOrderQueue.single_task>([] {}); + event Ev2 = InOrderQueue.single_task([] {}); { std::unique_lock lk(CvMutex); diff --git a/sycl/unittests/scheduler/RequiredWGSize.cpp b/sycl/unittests/scheduler/RequiredWGSize.cpp index 25c1f175e4e5c..675036ec30081 100644 --- a/sycl/unittests/scheduler/RequiredWGSize.cpp +++ b/sycl/unittests/scheduler/RequiredWGSize.cpp @@ -70,7 +70,7 @@ static void performChecks() { auto ExecBundle = sycl::build(KernelBundle); Queue.submit([&](sycl::handler &CGH) { CGH.use_kernel_bundle(ExecBundle); - CGH.single_task>([] {}); // Actual kernel does not matter + CGH.single_task([] {}); // Actual kernel does not matter }); EXPECT_EQ(KernelGetGroupInfoCalled, true); diff --git a/sycl/unittests/stream/stream.cpp b/sycl/unittests/stream/stream.cpp index 790831862ae12..84027bedd5827 100644 --- a/sycl/unittests/stream/stream.cpp +++ b/sycl/unittests/stream/stream.cpp @@ -53,7 +53,7 @@ TEST(Stream, TestStreamConstructorExceptionNoAllocation) { FAIL() << "Unexpected exception was thrown."; } - CGH.single_task>([=]() {}); + CGH.single_task([=]() {}); }); ASSERT_EQ(GBufferCreateCounter, 0u) << "Buffers were unexpectedly created."; @@ -77,7 +77,7 @@ TEST(Stream, Properties) { FAIL() << "Unexpected exception was thrown."; } - CGH.single_task>([=]() {}); + CGH.single_task([=]() {}); }) .wait(); } diff --git a/sycl/unittests/xpti_trace/NodeCreation.cpp b/sycl/unittests/xpti_trace/NodeCreation.cpp index 6adba2fabff9d..1ace648eb1cc3 100644 --- a/sycl/unittests/xpti_trace/NodeCreation.cpp +++ b/sycl/unittests/xpti_trace/NodeCreation.cpp @@ -53,7 +53,6 @@ class NodeCreation : public ::testing::Test { static constexpr int ColumnNumber = 13; const sycl::detail::code_location TestCodeLocation = { FileName, FunctionName, LineNumber, ColumnNumber}; - static constexpr size_t KernelSize = 1; }; TEST_F(NodeCreation, QueueParallelForWithGraphNode) { @@ -63,7 +62,7 @@ TEST_F(NodeCreation, QueueParallelForWithGraphNode) { Q.submit( [&](handler &Cgh) { sycl::accessor acc(buf, Cgh, sycl::read_write); - Cgh.parallel_for>(1, [=](sycl::id<1> idx) {}); + Cgh.parallel_for(1, [=](sycl::id<1> idx) {}); }, TestCodeLocation); } catch (sycl::exception &e) { @@ -80,7 +79,7 @@ TEST_F(NodeCreation, QueueParallelForWithGraphNode) { TEST_F(NodeCreation, QueueParallelForWithNoGraphNode) { sycl::queue Q; try { - Q.parallel_for>(1, [=](sycl::id<1> idx) {}); + Q.parallel_for(1, [=](sycl::id<1> idx) {}); } catch (sycl::exception &e) { std::ignore = e; } @@ -101,7 +100,7 @@ TEST_F(NodeCreation, QueueParallelForWithUserCodeLoc) { Q.submit( [&](handler &Cgh) { sycl::accessor acc(buf, Cgh, sycl::read_write); - Cgh.parallel_for>(1, [=](sycl::id<1> idx) {}); + Cgh.parallel_for(1, [=](sycl::id<1> idx) {}); }, TestCodeLocation); } catch (sycl::exception &e) { @@ -158,7 +157,7 @@ TEST_F(NodeCreation, CommandGraphRecord) { sycl::detail::tls_code_loc_t myLoc( {"LOCAL_CODELOC_FILE", "LOCAL_CODELOC_NAME", 1, 1}); Q.submit([&](handler &Cgh) { - Cgh.parallel_for>(1, [=](sycl::id<1> idx) {}); + Cgh.parallel_for(1, [=](sycl::id<1> idx) {}); }); } @@ -196,7 +195,7 @@ TEST_F(NodeCreation, CommandGraphAddAPI) { auto doAddNode = [&](const sycl::detail::code_location &loc) { sycl::detail::tls_code_loc_t codeLoc(loc); return cmdGraph.add([&](handler &Cgh) { - Cgh.parallel_for>(1, [=](sycl::id<1> idx) {}); + Cgh.parallel_for(1, [=](sycl::id<1> idx) {}); }); }; diff --git a/sycl/unittests/xpti_trace/QueueApiFailures.cpp b/sycl/unittests/xpti_trace/QueueApiFailures.cpp index 3d3be3a44e050..9ff5a7998417b 100644 --- a/sycl/unittests/xpti_trace/QueueApiFailures.cpp +++ b/sycl/unittests/xpti_trace/QueueApiFailures.cpp @@ -78,8 +78,7 @@ class QueueApiFailures : public ::testing::Test { FileName, FunctionName, LineNumber, ColumnNumber}; const sycl::detail::code_location ExtraTestCodeLocation = { FileName, ExtraFunctionName, ExtraLineNumber, ColumnNumber}; - static constexpr size_t KernelSize = 1; - using TestKI = detail::KernelInfo>; + using TestKI = detail::KernelInfo; const std::string TestCodeLocationMessage = BuildCodeLocationMessage( FileName, FunctionName, LineNumber, ColumnNumber); @@ -102,7 +101,7 @@ TEST_F(QueueApiFailures, QueueSubmit) { try { Q.submit( [&](handler &Cgh) { - Cgh.single_task>([=]() {}); + Cgh.single_task([=]() {}); }, TestCodeLocation); } catch (sycl::exception &e) { @@ -127,7 +126,7 @@ TEST_F(QueueApiFailures, QueueSingleTask) { sycl::queue Q; bool ExceptionCaught = false; try { - Q.single_task>([=]() {}, TestCodeLocation); + Q.single_task([=]() {}, TestCodeLocation); } catch (sycl::exception &e) { std::ignore = e; ExceptionCaught = true; @@ -331,7 +330,7 @@ TEST_F(QueueApiFailures, QueueParallelFor) { bool ExceptionCaught = false; const int globalWIs{512}; try { - Q.parallel_for>(globalWIs, [=](sycl::id<1> idx) {}); + Q.parallel_for(globalWIs, [=](sycl::id<1> idx) {}); } catch (sycl::exception &e) { std::ignore = e; ExceptionCaught = true; @@ -363,7 +362,7 @@ TEST_F(QueueApiFailures, QueueHostTaskWaitFail) { event EventToDepend; try { EventToDepend = - Q.single_task>([=]() {}, TestCodeLocation); + Q.single_task([=]() {}, TestCodeLocation); } catch (sycl::exception &e) { std::ignore = e; ExceptionCaught = true; @@ -404,7 +403,7 @@ TEST_F(QueueApiFailures, QueueHostTaskFail) { const std::string HostTaskExeptionStr = "Host task exception"; try { EventToDepend = - Q.single_task>([=]() {}, TestCodeLocation); + Q.single_task([=]() {}, TestCodeLocation); } catch (sycl::exception &e) { std::ignore = e; ExceptionCaught = true; @@ -488,7 +487,7 @@ TEST_F(QueueApiFailures, QueueKernelAsync) { Q.submit( [&](handler &Cgh) { Cgh.depends_on(EventToDepend); - Cgh.single_task>([=]() {}); + Cgh.single_task([=]() {}); }, ExtraTestCodeLocation); } catch (sycl::exception &e) { diff --git a/sycl/unittests/xpti_trace/QueueIDCheck.cpp b/sycl/unittests/xpti_trace/QueueIDCheck.cpp index 5dab29a603b3c..71f5559fa89d9 100644 --- a/sycl/unittests/xpti_trace/QueueIDCheck.cpp +++ b/sycl/unittests/xpti_trace/QueueIDCheck.cpp @@ -124,7 +124,7 @@ TEST_F(QueueID, QueueCreationAndKernelWithDeps) { Q1.submit( [&](handler &Cgh) { sycl::accessor acc(buf, Cgh, sycl::read_write); - Cgh.parallel_for>(1, [=](sycl::id<1> idx) {}); + Cgh.parallel_for(1, [=](sycl::id<1> idx) {}); }, {FileName, FunctionName, 1, 0}) .wait(); @@ -173,7 +173,7 @@ TEST_F(QueueID, QueueCreationAndKernelNoDeps) { Q0.submit( [&](handler &Cgh) { - Cgh.parallel_for>(1, [=](sycl::id<1> idx) {}); + Cgh.parallel_for(1, [=](sycl::id<1> idx) {}); }, {FileName, FunctionName, 2, 0}) .wait(); @@ -181,7 +181,7 @@ TEST_F(QueueID, QueueCreationAndKernelNoDeps) { Q1.submit( [&](handler &Cgh) { - Cgh.parallel_for>(1, [=](sycl::id<1> idx) {}); + Cgh.parallel_for(1, [=](sycl::id<1> idx) {}); }, {FileName, FunctionName, 3, 0}) .wait(); From a4b74a52e16880dce79b943503ea9a1ce40e461d Mon Sep 17 00:00:00 2001 From: Sergey Semenov Date: Tue, 1 Jul 2025 11:45:43 -0700 Subject: [PATCH 08/12] Apply clang-format --- .../Extensions/CommandGraph/Exceptions.cpp | 8 +++----- .../Extensions/CommandGraph/MultiThreaded.cpp | 4 ++-- .../Extensions/EnqueueFunctionsEvents.cpp | 2 +- sycl/unittests/helpers/TestKernel.hpp | 17 +++++++++-------- sycl/unittests/queue/Barrier.cpp | 12 ++++-------- sycl/unittests/queue/GetProfilingInfo.cpp | 4 ++-- sycl/unittests/scheduler/InOrderQueueDeps.cpp | 3 +-- sycl/unittests/xpti_trace/QueueApiFailures.cpp | 13 ++++--------- 8 files changed, 26 insertions(+), 37 deletions(-) diff --git a/sycl/unittests/Extensions/CommandGraph/Exceptions.cpp b/sycl/unittests/Extensions/CommandGraph/Exceptions.cpp index 6ff9bcea22306..65fcf10153d8c 100644 --- a/sycl/unittests/Extensions/CommandGraph/Exceptions.cpp +++ b/sycl/unittests/Extensions/CommandGraph/Exceptions.cpp @@ -653,8 +653,8 @@ TEST_F(CommandGraphTest, ClusterLaunchException) { Graph.begin_recording(Queue); auto Event1 = Queue.submit([&](sycl::handler &cgh) { cgh.parallel_for(sycl::nd_range<1>({4096}, {32}), - cluster_launch_property, - [&](sycl::nd_item<1> it) {}); + cluster_launch_property, + [&](sycl::nd_item<1> it) {}); }); Queue.wait(); Graph.end_recording(Queue); @@ -750,9 +750,7 @@ TEST_F(CommandGraphTest, RecordingWrongGraphDep) { TEST_F(CommandGraphTest, DynamicCommandGroupWrongGraph) { experimental::command_graph Graph1{Queue.get_context(), Queue.get_device()}; experimental::command_graph Graph2{Queue.get_context(), Queue.get_device()}; - auto CGF = [&](sycl::handler &CGH) { - CGH.single_task([]() {}); - }; + auto CGF = [&](sycl::handler &CGH) { CGH.single_task([]() {}); }; experimental::dynamic_command_group DynCG(Graph2, {CGF}); ASSERT_THROW(Graph1.add(DynCG), sycl::exception); diff --git a/sycl/unittests/Extensions/CommandGraph/MultiThreaded.cpp b/sycl/unittests/Extensions/CommandGraph/MultiThreaded.cpp index f1e1970397a5d..e5dee61621b32 100644 --- a/sycl/unittests/Extensions/CommandGraph/MultiThreaded.cpp +++ b/sycl/unittests/Extensions/CommandGraph/MultiThreaded.cpp @@ -63,8 +63,8 @@ void runKernelsInOrder(queue Q) { /// @param G Modifiable graph to add commands to. void addKernels( experimental::command_graph G) { - auto NodeA = G.add( - [&](sycl::handler &cgh) { cgh.single_task([]() {}); }); + auto NodeA = + G.add([&](sycl::handler &cgh) { cgh.single_task([]() {}); }); auto NodeB = G.add([&](sycl::handler &cgh) { cgh.single_task([]() {}); }, {experimental::property::node::depends_on(NodeA)}); diff --git a/sycl/unittests/Extensions/EnqueueFunctionsEvents.cpp b/sycl/unittests/Extensions/EnqueueFunctionsEvents.cpp index 8de4e4082987f..a1014adbff686 100644 --- a/sycl/unittests/Extensions/EnqueueFunctionsEvents.cpp +++ b/sycl/unittests/Extensions/EnqueueFunctionsEvents.cpp @@ -172,7 +172,7 @@ TEST_F(EnqueueFunctionsEventsTests, NDLaunchShortcutNoEvent) { &redefined_urEnqueueKernelLaunch); oneapiext::nd_launch(Q, nd_range<1>{range<1>{32}, range<1>{32}}, - [](nd_item<1>) {}); + [](nd_item<1>) {}); ASSERT_EQ(counter_urEnqueueKernelLaunch, size_t{1}); } diff --git a/sycl/unittests/helpers/TestKernel.hpp b/sycl/unittests/helpers/TestKernel.hpp index afdc5c52780ad..85e6f28c5f673 100644 --- a/sycl/unittests/helpers/TestKernel.hpp +++ b/sycl/unittests/helpers/TestKernel.hpp @@ -14,13 +14,11 @@ class TestKernel; class TestKernelWithAcc; - namespace sycl { inline namespace _V1 { namespace detail { template <> -struct KernelInfo - : public unittest::MockKernelInfoBase { +struct KernelInfo : public unittest::MockKernelInfoBase { static constexpr const char *getName() { return "TestKernel"; } static constexpr int64_t getKernelSize() { return 1; } static constexpr const char *getFileName() { return "TestKernel.hpp"; } @@ -32,10 +30,12 @@ struct KernelInfo }; template <> -struct KernelInfo - : public unittest::MockKernelInfoBase { +struct KernelInfo : public unittest::MockKernelInfoBase { static constexpr const char *getName() { return "TestKernelWithAcc"; } - static constexpr int64_t getKernelSize() { return sizeof(sycl::accessor); } + static constexpr int64_t getKernelSize() { + return sizeof(sycl::accessor); + } static constexpr const char *getFileName() { return "TestKernel.hpp"; } static constexpr const char *getFunctionName() { return "TestKernelWithAccFunctionName"; @@ -48,6 +48,7 @@ struct KernelInfo } // namespace _V1 } // namespace sycl -static sycl::unittest::MockDeviceImage Imgs[] = {sycl::unittest::generateDefaultImage({"TestKernel"}), - sycl::unittest::generateDefaultImage({"TestKernelWithAcc"})}; +static sycl::unittest::MockDeviceImage Imgs[] = { + sycl::unittest::generateDefaultImage({"TestKernel"}), + sycl::unittest::generateDefaultImage({"TestKernelWithAcc"})}; static sycl::unittest::MockDeviceImageArray<2> ImgArray{Imgs}; diff --git a/sycl/unittests/queue/Barrier.cpp b/sycl/unittests/queue/Barrier.cpp index 94b126eee5cb4..b13d703085fc4 100644 --- a/sycl/unittests/queue/Barrier.cpp +++ b/sycl/unittests/queue/Barrier.cpp @@ -28,10 +28,8 @@ TEST(Queue, HandlerBarrier) { sycl::queue Q; - Q.submit( - [&](sycl::handler &cgh) { cgh.single_task([=]() {}); }); - Q.submit( - [&](sycl::handler &cgh) { cgh.single_task([=]() {}); }); + Q.submit([&](sycl::handler &cgh) { cgh.single_task([=]() {}); }); + Q.submit([&](sycl::handler &cgh) { cgh.single_task([=]() {}); }); Q.submit([&](sycl::handler &cgh) { cgh.ext_oneapi_barrier(); }); @@ -47,10 +45,8 @@ TEST(Queue, ExtOneAPISubmitBarrier) { sycl::queue Q; - Q.submit( - [&](sycl::handler &cgh) { cgh.single_task([=]() {}); }); - Q.submit( - [&](sycl::handler &cgh) { cgh.single_task([=]() {}); }); + Q.submit([&](sycl::handler &cgh) { cgh.single_task([=]() {}); }); + Q.submit([&](sycl::handler &cgh) { cgh.single_task([=]() {}); }); Q.ext_oneapi_submit_barrier(); diff --git a/sycl/unittests/queue/GetProfilingInfo.cpp b/sycl/unittests/queue/GetProfilingInfo.cpp index cab6cd3179918..31c403844e581 100644 --- a/sycl/unittests/queue/GetProfilingInfo.cpp +++ b/sycl/unittests/queue/GetProfilingInfo.cpp @@ -311,8 +311,8 @@ TEST(GetProfilingInfo, queue Queue{Ctx, Dev}; DeviceTimerCalled = false; - event E = Queue.submit( - [&](handler &cgh) { cgh.single_task([]() {}); }); + event E = + Queue.submit([&](handler &cgh) { cgh.single_task([]() {}); }); EXPECT_FALSE(DeviceTimerCalled); } diff --git a/sycl/unittests/scheduler/InOrderQueueDeps.cpp b/sycl/unittests/scheduler/InOrderQueueDeps.cpp index da306e5899632..dc9bd42de84a7 100644 --- a/sycl/unittests/scheduler/InOrderQueueDeps.cpp +++ b/sycl/unittests/scheduler/InOrderQueueDeps.cpp @@ -96,8 +96,7 @@ ur_result_t redefinedEnqueueEventsWaitWithBarrierExt(void *pParams) { } sycl::event submitKernel(sycl::queue &Q) { - return Q.submit( - [&](handler &cgh) { cgh.single_task([]() {}); }); + return Q.submit([&](handler &cgh) { cgh.single_task([]() {}); }); } TEST_F(SchedulerTest, InOrderQueueIsolatedDeps) { diff --git a/sycl/unittests/xpti_trace/QueueApiFailures.cpp b/sycl/unittests/xpti_trace/QueueApiFailures.cpp index 9ff5a7998417b..1e88143774e21 100644 --- a/sycl/unittests/xpti_trace/QueueApiFailures.cpp +++ b/sycl/unittests/xpti_trace/QueueApiFailures.cpp @@ -99,11 +99,8 @@ TEST_F(QueueApiFailures, QueueSubmit) { sycl::queue Q; bool ExceptionCaught = false; try { - Q.submit( - [&](handler &Cgh) { - Cgh.single_task([=]() {}); - }, - TestCodeLocation); + Q.submit([&](handler &Cgh) { Cgh.single_task([=]() {}); }, + TestCodeLocation); } catch (sycl::exception &e) { std::ignore = e; ExceptionCaught = true; @@ -361,8 +358,7 @@ TEST_F(QueueApiFailures, QueueHostTaskWaitFail) { bool ExceptionCaught = false; event EventToDepend; try { - EventToDepend = - Q.single_task([=]() {}, TestCodeLocation); + EventToDepend = Q.single_task([=]() {}, TestCodeLocation); } catch (sycl::exception &e) { std::ignore = e; ExceptionCaught = true; @@ -402,8 +398,7 @@ TEST_F(QueueApiFailures, QueueHostTaskFail) { event EventToDepend; const std::string HostTaskExeptionStr = "Host task exception"; try { - EventToDepend = - Q.single_task([=]() {}, TestCodeLocation); + EventToDepend = Q.single_task([=]() {}, TestCodeLocation); } catch (sycl::exception &e) { std::ignore = e; ExceptionCaught = true; From a91c7a95673662aa51a67b845736c9db2fcfe77c Mon Sep 17 00:00:00 2001 From: Sergey Semenov Date: Wed, 16 Jul 2025 08:19:08 -0700 Subject: [PATCH 09/12] Try fixing cleanup --- .../program_manager/program_manager.cpp | 71 ++++++++++--------- 1 file changed, 39 insertions(+), 32 deletions(-) diff --git a/sycl/source/detail/program_manager/program_manager.cpp b/sycl/source/detail/program_manager/program_manager.cpp index 589746ee0a046..32079034e7a63 100644 --- a/sycl/source/detail/program_manager/program_manager.cpp +++ b/sycl/source/detail/program_manager/program_manager.cpp @@ -2184,6 +2184,42 @@ void ProgramManager::removeImages(sycl_device_binaries DeviceBinary) { // Drop the kernel argument mask map m_EliminatedKernelArgMasks.erase(Img); + // Unmap the unique kernel IDs for the offload entries + for (sycl_offload_entry EntriesIt = EntriesB; EntriesIt != EntriesE; + EntriesIt = EntriesIt->Increment()) { + + // Drop entry for service kernel + if (std::strstr(EntriesIt->GetName(), "__sycl_service_kernel__")) { + m_ServiceKernels.erase(EntriesIt->GetName()); + continue; + } + + // Exported device functions won't have a kernel ID + if (m_ExportedSymbolImages.find(EntriesIt->GetName()) != + m_ExportedSymbolImages.end()) { + continue; + } + + // remove everything associated with this KernelName + m_KernelUsesAssert.erase(EntriesIt->GetName()); + m_KernelImplicitLocalArgPos.erase(EntriesIt->GetName()); + + if (auto It = m_KernelName2KernelIDs.find(EntriesIt->GetName()); + It != m_KernelName2KernelIDs.end()) { + m_KernelIDs2BinImage.erase(It->second); + m_KernelName2KernelIDs.erase(It); + } + } + + // Drop reverse mapping + m_BinImg2KernelIDs.erase(Img); + + // Unregister exported symbols (needs to happen after the ID unmap loop) + for (const sycl_device_binary_property &ESProp : + Img->getExportedSymbols()) { + m_ExportedSymbolImages.erase(ESProp->Name); + } + for (const sycl_device_binary_property &VFProp : Img->getVirtualFunctions()) { std::string StrValue = DeviceBinaryProperty(VFProp).asCString(); @@ -2230,46 +2266,17 @@ void ProgramManager::removeImages(sycl_device_binaries DeviceBinary) { } } - // Unmap the unique kernel IDs for the offload entries + // Clean up kernel name based cache instances. Needs to happen after the + // calls to removeAllRelatedEntries above since these instances own + // kernel caches. for (sycl_offload_entry EntriesIt = EntriesB; EntriesIt != EntriesE; EntriesIt = EntriesIt->Increment()) { - - // Drop entry for service kernel - if (std::strstr(EntriesIt->GetName(), "__sycl_service_kernel__")) { - m_ServiceKernels.erase(EntriesIt->GetName()); - continue; - } - - // Exported device functions won't have a kernel ID - if (m_ExportedSymbolImages.find(EntriesIt->GetName()) != - m_ExportedSymbolImages.end()) { - continue; - } - - // remove everything associated with this KernelName - m_KernelUsesAssert.erase(EntriesIt->GetName()); - m_KernelImplicitLocalArgPos.erase(EntriesIt->GetName()); - - if (auto It = m_KernelName2KernelIDs.find(EntriesIt->GetName()); - It != m_KernelName2KernelIDs.end()) { - m_KernelIDs2BinImage.erase(It->second); - m_KernelName2KernelIDs.erase(It); - } if (auto It = m_KernelNameBasedCaches.find(EntriesIt->GetName()); It != m_KernelNameBasedCaches.end()) { m_KernelNameBasedCaches.erase(It); } } - // Drop reverse mapping - m_BinImg2KernelIDs.erase(Img); - - // Unregister exported symbols (needs to happen after the ID unmap loop) - for (const sycl_device_binary_property &ESProp : - Img->getExportedSymbols()) { - m_ExportedSymbolImages.erase(ESProp->Name); - } - m_DeviceImages.erase(DevImgIt); } } From 81a855df588c4235aceae7027c9d820f43309638 Mon Sep 17 00:00:00 2001 From: Sergey Semenov Date: Mon, 21 Jul 2025 03:51:22 -0700 Subject: [PATCH 10/12] Revert "Try fixing cleanup" This reverts commit a91c7a95673662aa51a67b845736c9db2fcfe77c. --- .../program_manager/program_manager.cpp | 71 +++++++++---------- 1 file changed, 32 insertions(+), 39 deletions(-) diff --git a/sycl/source/detail/program_manager/program_manager.cpp b/sycl/source/detail/program_manager/program_manager.cpp index 32079034e7a63..589746ee0a046 100644 --- a/sycl/source/detail/program_manager/program_manager.cpp +++ b/sycl/source/detail/program_manager/program_manager.cpp @@ -2184,42 +2184,6 @@ void ProgramManager::removeImages(sycl_device_binaries DeviceBinary) { // Drop the kernel argument mask map m_EliminatedKernelArgMasks.erase(Img); - // Unmap the unique kernel IDs for the offload entries - for (sycl_offload_entry EntriesIt = EntriesB; EntriesIt != EntriesE; - EntriesIt = EntriesIt->Increment()) { - - // Drop entry for service kernel - if (std::strstr(EntriesIt->GetName(), "__sycl_service_kernel__")) { - m_ServiceKernels.erase(EntriesIt->GetName()); - continue; - } - - // Exported device functions won't have a kernel ID - if (m_ExportedSymbolImages.find(EntriesIt->GetName()) != - m_ExportedSymbolImages.end()) { - continue; - } - - // remove everything associated with this KernelName - m_KernelUsesAssert.erase(EntriesIt->GetName()); - m_KernelImplicitLocalArgPos.erase(EntriesIt->GetName()); - - if (auto It = m_KernelName2KernelIDs.find(EntriesIt->GetName()); - It != m_KernelName2KernelIDs.end()) { - m_KernelIDs2BinImage.erase(It->second); - m_KernelName2KernelIDs.erase(It); - } - } - - // Drop reverse mapping - m_BinImg2KernelIDs.erase(Img); - - // Unregister exported symbols (needs to happen after the ID unmap loop) - for (const sycl_device_binary_property &ESProp : - Img->getExportedSymbols()) { - m_ExportedSymbolImages.erase(ESProp->Name); - } - for (const sycl_device_binary_property &VFProp : Img->getVirtualFunctions()) { std::string StrValue = DeviceBinaryProperty(VFProp).asCString(); @@ -2266,17 +2230,46 @@ void ProgramManager::removeImages(sycl_device_binaries DeviceBinary) { } } - // Clean up kernel name based cache instances. Needs to happen after the - // calls to removeAllRelatedEntries above since these instances own - // kernel caches. + // Unmap the unique kernel IDs for the offload entries for (sycl_offload_entry EntriesIt = EntriesB; EntriesIt != EntriesE; EntriesIt = EntriesIt->Increment()) { + + // Drop entry for service kernel + if (std::strstr(EntriesIt->GetName(), "__sycl_service_kernel__")) { + m_ServiceKernels.erase(EntriesIt->GetName()); + continue; + } + + // Exported device functions won't have a kernel ID + if (m_ExportedSymbolImages.find(EntriesIt->GetName()) != + m_ExportedSymbolImages.end()) { + continue; + } + + // remove everything associated with this KernelName + m_KernelUsesAssert.erase(EntriesIt->GetName()); + m_KernelImplicitLocalArgPos.erase(EntriesIt->GetName()); + + if (auto It = m_KernelName2KernelIDs.find(EntriesIt->GetName()); + It != m_KernelName2KernelIDs.end()) { + m_KernelIDs2BinImage.erase(It->second); + m_KernelName2KernelIDs.erase(It); + } if (auto It = m_KernelNameBasedCaches.find(EntriesIt->GetName()); It != m_KernelNameBasedCaches.end()) { m_KernelNameBasedCaches.erase(It); } } + // Drop reverse mapping + m_BinImg2KernelIDs.erase(Img); + + // Unregister exported symbols (needs to happen after the ID unmap loop) + for (const sycl_device_binary_property &ESProp : + Img->getExportedSymbols()) { + m_ExportedSymbolImages.erase(ESProp->Name); + } + m_DeviceImages.erase(DevImgIt); } } From 44ca217fcbff38d60e6d43e6805f6342ce892e78 Mon Sep 17 00:00:00 2001 From: Sergey Semenov Date: Mon, 21 Jul 2025 06:01:21 -0700 Subject: [PATCH 11/12] Add output for debugging --- .../program_manager/program_manager.cpp | 21 +++++++++++++++++-- 1 file changed, 19 insertions(+), 2 deletions(-) diff --git a/sycl/source/detail/program_manager/program_manager.cpp b/sycl/source/detail/program_manager/program_manager.cpp index 589746ee0a046..cbac262a1d863 100644 --- a/sycl/source/detail/program_manager/program_manager.cpp +++ b/sycl/source/detail/program_manager/program_manager.cpp @@ -2167,8 +2167,9 @@ void ProgramManager::removeImages(sycl_device_binaries DeviceBinary) { return; // Acquire lock to read and modify maps for kernel bundles std::lock_guard KernelIDsGuard(m_KernelIDsMutex); - + std::cout << "Removing images" << std::endl; for (int I = 0; I < DeviceBinary->NumDeviceBinaries; I++) { + std::cout << " Binary " << I << std::endl; sycl_device_binary RawImg = &(DeviceBinary->DeviceBinaries[I]); auto DevImgIt = m_DeviceImages.find(RawImg); @@ -2181,9 +2182,11 @@ void ProgramManager::removeImages(sycl_device_binaries DeviceBinary) { RTDeviceBinaryImage *Img = DevImgIt->second.get(); + std::cout << " Erase arg mask" << std::endl; // Drop the kernel argument mask map m_EliminatedKernelArgMasks.erase(Img); + std::cout << " Erase VFSet2Bin" << std::endl; for (const sycl_device_binary_property &VFProp : Img->getVirtualFunctions()) { std::string StrValue = DeviceBinaryProperty(VFProp).asCString(); @@ -2191,11 +2194,13 @@ void ProgramManager::removeImages(sycl_device_binaries DeviceBinary) { m_VFSet2BinImage.erase(SetName); } + std::cout << " Erase device globals" << std::endl; m_DeviceGlobals.eraseEntries(Img); { std::lock_guard HostPipesGuard(m_HostPipesMutex); auto HostPipes = Img->getHostPipes(); + std::cout << " Erase host pipes" << std::endl; for (const sycl_device_binary_property &HostPipe : HostPipes) { if (auto HostPipesIt = m_HostPipes.find(HostPipe->Name); HostPipesIt != m_HostPipes.end()) { @@ -2215,27 +2220,30 @@ void ProgramManager::removeImages(sycl_device_binaries DeviceBinary) { // Purge references to the image in native programs map { std::lock_guard NativeProgramsGuard(MNativeProgramsMutex); - // The map does not keep references to program handles; we can erase the // entry without calling UR release for (auto It = NativePrograms.begin(); It != NativePrograms.end();) { auto CurIt = It++; if (CurIt->second.second == Img) { + std::cout << " Remove all related entries" << std::endl; if (auto ContextImpl = CurIt->second.first.lock()) { ContextImpl->getKernelProgramCache().removeAllRelatedEntries( Img->getImageID()); } + std::cout << " Remove from native programs" << std::endl; NativePrograms.erase(CurIt); } } } // Unmap the unique kernel IDs for the offload entries + std::cout << " Clear entries specific info" << std::endl; for (sycl_offload_entry EntriesIt = EntriesB; EntriesIt != EntriesE; EntriesIt = EntriesIt->Increment()) { // Drop entry for service kernel if (std::strstr(EntriesIt->GetName(), "__sycl_service_kernel__")) { + std::cout << " Erase from service kernels" << std::endl; m_ServiceKernels.erase(EntriesIt->GetName()); continue; } @@ -2243,33 +2251,42 @@ void ProgramManager::removeImages(sycl_device_binaries DeviceBinary) { // Exported device functions won't have a kernel ID if (m_ExportedSymbolImages.find(EntriesIt->GetName()) != m_ExportedSymbolImages.end()) { + std::cout << " Exported function, skip" << std::endl; continue; } // remove everything associated with this KernelName + std::cout << " Erase from uses assert" << std::endl; m_KernelUsesAssert.erase(EntriesIt->GetName()); + std::cout << " Erase from implicitlocalargpos" << std::endl; m_KernelImplicitLocalArgPos.erase(EntriesIt->GetName()); if (auto It = m_KernelName2KernelIDs.find(EntriesIt->GetName()); It != m_KernelName2KernelIDs.end()) { + std::cout << " Erase ID -> Img mapping" << std::endl; m_KernelIDs2BinImage.erase(It->second); + std::cout << " Erase Name -> ID mapping" << std::endl; m_KernelName2KernelIDs.erase(It); } if (auto It = m_KernelNameBasedCaches.find(EntriesIt->GetName()); It != m_KernelNameBasedCaches.end()) { + std::cout << " Erase cache instance" << std::endl; m_KernelNameBasedCaches.erase(It); } } // Drop reverse mapping + std::cout << " Erase reverse mapping" << std::endl; m_BinImg2KernelIDs.erase(Img); + std::cout << " Erase exported symbol images" << std::endl; // Unregister exported symbols (needs to happen after the ID unmap loop) for (const sycl_device_binary_property &ESProp : Img->getExportedSymbols()) { m_ExportedSymbolImages.erase(ESProp->Name); } + std::cout << " Erase from device images" << std::endl; m_DeviceImages.erase(DevImgIt); } } From f59f820761290eca682e6654024a33119fd52284 Mon Sep 17 00:00:00 2001 From: Sergey Semenov Date: Tue, 22 Jul 2025 08:24:52 -0700 Subject: [PATCH 12/12] Always fail dlopen tests --- sycl/test-e2e/SharedLib/use_with_dlopen.cpp | 2 +- sycl/test-e2e/SharedLib/use_with_dlopen_verify_cache.cpp | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/sycl/test-e2e/SharedLib/use_with_dlopen.cpp b/sycl/test-e2e/SharedLib/use_with_dlopen.cpp index 4d4910efa78e3..c5d9d8fd0f0b4 100644 --- a/sycl/test-e2e/SharedLib/use_with_dlopen.cpp +++ b/sycl/test-e2e/SharedLib/use_with_dlopen.cpp @@ -85,6 +85,6 @@ int main() { run(); #endif - return 0; + return -1; } #endif diff --git a/sycl/test-e2e/SharedLib/use_with_dlopen_verify_cache.cpp b/sycl/test-e2e/SharedLib/use_with_dlopen_verify_cache.cpp index bab8130bbab68..0dfdba721e003 100644 --- a/sycl/test-e2e/SharedLib/use_with_dlopen_verify_cache.cpp +++ b/sycl/test-e2e/SharedLib/use_with_dlopen_verify_cache.cpp @@ -106,6 +106,6 @@ int main() { run(); #endif - return 0; + return -1; } #endif