Skip to content

[DO NOT MERGE] Debug changes #19530

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Draft
wants to merge 18 commits into
base: sycl
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 12 additions & 4 deletions sycl/include/sycl/detail/kernel_name_based_cache.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -8,19 +8,27 @@
#pragma once

#include <sycl/detail/export.hpp>
#include <sycl/detail/kernel_name_str_t.hpp>

namespace sycl {
inline namespace _V1 {
namespace detail {

struct KernelNameBasedCacheT;
class KernelNameBasedCacheT;

#ifndef __INTEL_PREVIEW_BREAKING_CHANGES
__SYCL_EXPORT KernelNameBasedCacheT *createKernelNameBasedCache();
#endif
__SYCL_EXPORT KernelNameBasedCacheT *
createKernelNameBasedCache(detail::ABINeutralKernelNameStrRefT MKernelName);

// Retrieves a cache pointer unique to a kernel name type that can be used to
// avoid kernel name based lookup in the runtime.
template <typename KernelName>
KernelNameBasedCacheT *getKernelNameBasedCache() {
static KernelNameBasedCacheT *Instance = createKernelNameBasedCache();
template <typename KernelNameT>
KernelNameBasedCacheT *
getKernelNameBasedCache(detail::ABINeutralKernelNameStrRefT KernelName) {
static KernelNameBasedCacheT *Instance =
createKernelNameBasedCache(KernelName);
return Instance;
}

Expand Down
2 changes: 2 additions & 0 deletions sycl/include/sycl/detail/kernel_name_str_t.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -18,10 +18,12 @@ namespace detail {
using KernelNameStrT = std::string_view;
using KernelNameStrRefT = std::string_view;
using ABINeutralKernelNameStrT = detail::string_view;
using ABINeutralKernelNameStrRefT = detail::string_view;
#else
using KernelNameStrT = std::string;
using KernelNameStrRefT = const std::string &;
using ABINeutralKernelNameStrT = detail::string;
using ABINeutralKernelNameStrRefT = const detail::string &;
#endif

inline KernelNameStrT toKernelNameStrT(const ABINeutralKernelNameStrT &str) {
Expand Down
3 changes: 2 additions & 1 deletion sycl/include/sycl/handler.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -888,14 +888,15 @@ class __SYCL_EXPORT handler {
constexpr std::string_view KernelNameStr =
detail::getKernelName<KernelName>();
MKernelName = KernelNameStr;
setKernelNameBasedCachePtr(
detail::getKernelNameBasedCache<KernelName>(KernelNameStr));
} else {
// In case w/o the integration header it is necessary to process
// accessors from the list(which are associated with this handler) as
// arguments. We must copy the associated accessors as they are checked
// later during finalize.
setArgsToAssociatedAccessors();
}
setKernelNameBasedCachePtr(detail::getKernelNameBasedCache<KernelName>());

// If the kernel lambda is callable with a kernel_handler argument, manifest
// the associated kernel handler.
Expand Down
1 change: 1 addition & 0 deletions sycl/source/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -288,6 +288,7 @@ set(SYCL_COMMON_SOURCES
"detail/kernel_compiler/kernel_compiler_sycl.cpp"
"detail/kernel_impl.cpp"
"detail/kernel_name_based_cache.cpp"
"detail/kernel_name_based_cache_t.cpp"
"detail/kernel_program_cache.cpp"
"detail/memory_manager.cpp"
"detail/pipes.cpp"
Expand Down
4 changes: 4 additions & 0 deletions sycl/source/detail/global_handler.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -257,12 +257,14 @@ ThreadPool &GlobalHandler::getHostTaskThreadPool() {
return TP;
}

#ifndef __INTEL_PREVIEW_BREAKING_CHANGES
KernelNameBasedCacheT *GlobalHandler::createKernelNameBasedCache() {
static std::deque<KernelNameBasedCacheT> &KernelNameBasedCaches =
getOrCreate(MKernelNameBasedCaches);
LockGuard LG{MKernelNameBasedCaches.Lock};
return &KernelNameBasedCaches.emplace_back();
}
#endif

void GlobalHandler::releaseDefaultContexts() {
// Release shared-pointers to SYCL objects.
Expand Down Expand Up @@ -392,9 +394,11 @@ void shutdown_late() {
Handler->MScheduler.Inst.reset(nullptr);
Handler->MProgramManager.Inst.reset(nullptr);

#ifndef __INTEL_PREVIEW_BREAKING_CHANGES
// Cache stores handles to the adapter, so clear it before
// releasing adapters.
Handler->MKernelNameBasedCaches.Inst.reset(nullptr);
#endif

// Clear the adapters and reset the instance if it was there.
Handler->unloadAdapters();
Expand Down
8 changes: 7 additions & 1 deletion sycl/source/detail/global_handler.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,9 @@
#include <sycl/detail/spinlock.hpp>
#include <sycl/detail/util.hpp>

#ifndef __INTEL_PREVIEW_BREAKING_CHANGES
#include <deque>
#endif
#include <memory>
#include <unordered_map>

Expand All @@ -27,7 +29,7 @@ class adapter_impl;
class ods_target_list;
class XPTIRegistry;
class ThreadPool;
struct KernelNameBasedCacheT;
class KernelNameBasedCacheT;

/// Wrapper class for global data structures with non-trivial destructors.
///
Expand Down Expand Up @@ -75,7 +77,9 @@ class GlobalHandler {
ods_target_list &getOneapiDeviceSelectorTargets(const std::string &InitValue);
XPTIRegistry &getXPTIRegistry();
ThreadPool &getHostTaskThreadPool();
#ifndef __INTEL_PREVIEW_BREAKING_CHANGES
KernelNameBasedCacheT *createKernelNameBasedCache();
#endif
static void registerStaticVarShutdownHandler();

bool isOkToDefer() const;
Expand Down Expand Up @@ -132,7 +136,9 @@ class GlobalHandler {
InstWithLock<XPTIRegistry> MXPTIRegistry;
// Thread pool for host task and event callbacks execution
InstWithLock<ThreadPool> MHostTaskThreadPool;
#ifndef __INTEL_PREVIEW_BREAKING_CHANGES
InstWithLock<std::deque<KernelNameBasedCacheT>> MKernelNameBasedCaches;
#endif
};
} // namespace detail
} // namespace _V1
Expand Down
3 changes: 2 additions & 1 deletion sycl/source/detail/graph/graph_impl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1587,9 +1587,10 @@ void exec_graph_impl::populateURKernelUpdateStructs(
UrKernel = SyclKernelImpl->getHandleRef();
EliminatedArgMask = SyclKernelImpl->getKernelArgMask();
} else {
assert(ExecCG.MKernelNameBasedCachePtr);
BundleObjs = sycl::detail::ProgramManager::getInstance().getOrCreateKernel(
ContextImpl, DeviceImpl, ExecCG.MKernelName,
ExecCG.MKernelNameBasedCachePtr);
*ExecCG.MKernelNameBasedCachePtr);
UrKernel = BundleObjs->MKernelHandle;
EliminatedArgMask = BundleObjs->MKernelArgMask;
}
Expand Down
9 changes: 9 additions & 0 deletions sycl/source/detail/kernel_name_based_cache.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -7,15 +7,24 @@
//===----------------------------------------------------------------------===//

#include <detail/global_handler.hpp>
#include <detail/program_manager/program_manager.hpp>
#include <sycl/detail/kernel_name_based_cache.hpp>

namespace sycl {
inline namespace _V1 {
namespace detail {

#ifndef __INTEL_PREVIEW_BREAKING_CHANGES
KernelNameBasedCacheT *createKernelNameBasedCache() {
return GlobalHandler::instance().createKernelNameBasedCache();
}
#endif

KernelNameBasedCacheT *
createKernelNameBasedCache(detail::ABINeutralKernelNameStrRefT KernelName) {
return ProgramManager::getInstance().createKernelNameBasedCache(
KernelName.data());
}

} // namespace detail
} // namespace _V1
Expand Down
56 changes: 56 additions & 0 deletions sycl/source/detail/kernel_name_based_cache_t.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
//==-------------------- kernel_name_based_cache_t.cpp ---------------------==//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
#include <detail/kernel_name_based_cache_t.hpp>
#include <detail/program_manager/program_manager.hpp>

namespace sycl {
inline namespace _V1 {
namespace detail {

KernelNameBasedCacheT::KernelNameBasedCacheT(KernelNameStrRefT KernelName) {
init(KernelName);
}

void KernelNameBasedCacheT::init(KernelNameStrRefT KernelName) {
auto &PM = detail::ProgramManager::getInstance();
MUsesAssert = PM.kernelUsesAssert(KernelName);
MImplicitLocalArgPos = PM.kernelImplicitLocalArgPos(KernelName);
#ifndef __INTEL_PREVIEW_BREAKING_CHANGES
MInitialized.store(true);
#endif
}

#ifndef __INTEL_PREVIEW_BREAKING_CHANGES
void KernelNameBasedCacheT::initIfNeeded(KernelNameStrRefT KernelName) {
if (!MInitialized.load())
init(KernelName);
}
#endif

FastKernelSubcacheT &KernelNameBasedCacheT::getKernelSubcache() {
assertInitialized();
return MFastKernelSubcache;
}
bool KernelNameBasedCacheT::usesAssert() {
assertInitialized();
return MUsesAssert;
}
const std::optional<int> &KernelNameBasedCacheT::getImplicitLocalArgPos() {
assertInitialized();
return MImplicitLocalArgPos;
}

void KernelNameBasedCacheT::assertInitialized() {
#ifndef __INTEL_PREVIEW_BREAKING_CHANGES
assert(MInitialized.load() && "Cache needs to be initialized before use");
#endif
}

} // namespace detail
} // namespace _V1
} // namespace sycl
35 changes: 28 additions & 7 deletions sycl/source/detail/kernel_name_based_cache_t.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
#include <detail/hashers.hpp>
#include <detail/kernel_arg_mask.hpp>
#include <emhash/hash_table8.hpp>
#include <sycl/detail/kernel_name_str_t.hpp>
#include <sycl/detail/spinlock.hpp>
#include <sycl/detail/ur.hpp>

Expand Down Expand Up @@ -83,13 +84,33 @@ struct FastKernelSubcacheT {
FastKernelSubcacheMutexT Mutex;
};

struct KernelNameBasedCacheT {
FastKernelSubcacheT FastKernelSubcache;
std::optional<bool> UsesAssert;
// Implicit local argument position is represented by an optional int, this
// uses another optional on top of that to represent lazy initialization of
// the cached value.
std::optional<std::optional<int>> ImplicitLocalArgPos;
// This class is used for caching kernel name based information.
// Pointers to instances of this class are stored in header function templates
// as a static variable to avoid repeated runtime lookup overhead.
class KernelNameBasedCacheT {
public:
#ifndef __INTEL_PREVIEW_BREAKING_CHANGES
KernelNameBasedCacheT() = default;
#endif
KernelNameBasedCacheT(KernelNameStrRefT KernelName);

void init(KernelNameStrRefT KernelName);
#ifndef __INTEL_PREVIEW_BREAKING_CHANGES
void initIfNeeded(KernelNameStrRefT KernelName);
#endif
FastKernelSubcacheT &getKernelSubcache();
bool usesAssert();
const std::optional<int> &getImplicitLocalArgPos();

private:
void assertInitialized();

#ifndef __INTEL_PREVIEW_BREAKING_CHANGES
std::atomic<bool> MInitialized = false;
#endif
FastKernelSubcacheT MFastKernelSubcache;
bool MUsesAssert;
std::optional<int> MImplicitLocalArgPos;
};

} // namespace detail
Expand Down
50 changes: 13 additions & 37 deletions sycl/source/detail/kernel_program_cache.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -221,25 +221,18 @@ class KernelProgramCache {

class FastKernelSubcacheWrapper {
public:
FastKernelSubcacheWrapper(FastKernelSubcacheT *CachePtr,
FastKernelSubcacheWrapper(FastKernelSubcacheT &Subcache,
ur_context_handle_t UrContext)
: MSubcachePtr{CachePtr}, MUrContext{UrContext} {
if (!MSubcachePtr) {
MOwnsSubcache = true;
MSubcachePtr = new FastKernelSubcacheT();
}
}
: MSubcachePtr{&Subcache}, MUrContext{UrContext} {}
FastKernelSubcacheWrapper(const FastKernelSubcacheWrapper &) = delete;
FastKernelSubcacheWrapper(FastKernelSubcacheWrapper &&Other)
: MSubcachePtr{Other.MSubcachePtr}, MOwnsSubcache{Other.MOwnsSubcache},
MUrContext{Other.MUrContext} {
: MSubcachePtr{Other.MSubcachePtr}, MUrContext{Other.MUrContext} {
Other.MSubcachePtr = nullptr;
}
FastKernelSubcacheWrapper &
operator=(const FastKernelSubcacheWrapper &) = delete;
FastKernelSubcacheWrapper &operator=(FastKernelSubcacheWrapper &&Other) {
MSubcachePtr = Other.MSubcachePtr;
MOwnsSubcache = Other.MOwnsSubcache;
MUrContext = Other.MUrContext;
Other.MSubcachePtr = nullptr;
return *this;
Expand All @@ -249,11 +242,6 @@ class KernelProgramCache {
if (!MSubcachePtr)
return;

if (MOwnsSubcache) {
delete MSubcachePtr;
return;
}

// Single subcache might be used by different contexts.
// Remove all entries from the subcache that are associated with the
// current context.
Expand All @@ -269,8 +257,7 @@ class KernelProgramCache {
FastKernelSubcacheT &get() { return *MSubcachePtr; }

private:
FastKernelSubcacheT *MSubcachePtr = nullptr;
bool MOwnsSubcache = false;
FastKernelSubcacheT *MSubcachePtr;
ur_context_handle_t MUrContext = nullptr;
};

Expand Down Expand Up @@ -459,18 +446,9 @@ class KernelProgramCache {

FastKernelCacheValPtr
tryToGetKernelFast(KernelNameStrRefT KernelName, ur_device_handle_t Device,
FastKernelSubcacheT *KernelSubcacheHint) {
FastKernelCacheWriteLockT Lock(MFastKernelCacheMutex);
if (!KernelSubcacheHint) {
auto It = MFastKernelCache.try_emplace(
KernelName,
FastKernelSubcacheWrapper(KernelSubcacheHint, getURContext()));
KernelSubcacheHint = &It.first->second.get();
}

const FastKernelSubcacheEntriesT &SubcacheEntries =
KernelSubcacheHint->Entries;
FastKernelSubcacheReadLockT SubcacheLock{KernelSubcacheHint->Mutex};
FastKernelSubcacheT &KernelSubcache) {
const FastKernelSubcacheEntriesT &SubcacheEntries = KernelSubcache.Entries;
FastKernelSubcacheReadLockT SubcacheLock{KernelSubcache.Mutex};
ur_context_handle_t Context = getURContext();
const FastKernelCacheKeyT RequiredKey(Device, Context);
// Search for the kernel in the subcache.
Expand All @@ -488,7 +466,7 @@ class KernelProgramCache {

void saveKernel(KernelNameStrRefT KernelName, ur_device_handle_t Device,
const FastKernelCacheValPtr &CacheVal,
FastKernelSubcacheT *KernelSubcacheHint) {
FastKernelSubcacheT &KernelSubcache) {
if (SYCLConfig<SYCL_IN_MEM_CACHE_EVICTION_THRESHOLD>::
isProgramCacheEvictionEnabled()) {
// Save kernel in fast cache only if the corresponding program is also
Expand All @@ -508,15 +486,13 @@ class KernelProgramCache {
// if no insertion took place, then some other thread has already inserted
// smth in the cache
traceKernel("Kernel inserted.", KernelName, true);
auto It = MFastKernelCache.try_emplace(
KernelName,
FastKernelSubcacheWrapper(KernelSubcacheHint, getURContext()));
KernelSubcacheHint = &It.first->second.get();
MFastKernelCache.try_emplace(
KernelName, FastKernelSubcacheWrapper(KernelSubcache, getURContext()));

FastKernelSubcacheWriteLockT SubcacheLock{KernelSubcacheHint->Mutex};
FastKernelSubcacheWriteLockT SubcacheLock{KernelSubcache.Mutex};
ur_context_handle_t Context = getURContext();
KernelSubcacheHint->Entries.emplace_back(
FastKernelCacheKeyT(Device, Context), CacheVal);
KernelSubcache.Entries.emplace_back(FastKernelCacheKeyT(Device, Context),
CacheVal);
}

// Expects locked program cache
Expand Down
Loading
Loading