Skip to content

feature: Complete madvise for System Allocator #845

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 4 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
80 changes: 60 additions & 20 deletions level_zero/core/source/context/context_imp.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1049,20 +1049,13 @@ ze_result_t ContextImp::setAtomicAccessAttribute(ze_device_handle_t hDevice, con

if (attrEval & ZE_MEMORY_ATOMIC_ATTR_EXP_FLAG_DEVICE_ATOMICS) {
auto deviceAllocCapabilities = memProp.deviceAllocCapabilities;
if (isSharedSystemAlloc) {
deviceAllocCapabilities = memProp.sharedSystemAllocCapabilities;
}

if (!(deviceAllocCapabilities & ZE_MEMORY_ACCESS_CAP_FLAG_ATOMIC)) {
return ZE_RESULT_ERROR_INVALID_ARGUMENT;
}
mode = NEO::AtomicAccessMode::device;
}
if (attrEval & ZE_MEMORY_ATOMIC_ATTR_EXP_FLAG_HOST_ATOMICS) {
auto hostAllocCapabilities = memProp.hostAllocCapabilities;
if (isSharedSystemAlloc) {
hostAllocCapabilities = memProp.sharedSystemAllocCapabilities;
}
if (!(hostAllocCapabilities & ZE_MEMORY_ACCESS_CAP_FLAG_ATOMIC)) {
return ZE_RESULT_ERROR_INVALID_ARGUMENT;
}
Expand All @@ -1086,24 +1079,40 @@ ze_result_t ContextImp::setAtomicAccessAttribute(ze_device_handle_t hDevice, con
mode = NEO::AtomicAccessMode::system;
}

if ((attr == 0) || (attrEval & ZE_MEMORY_ATOMIC_ATTR_EXP_FLAG_NO_HOST_ATOMICS) || (attrEval & ZE_MEMORY_ATOMIC_ATTR_EXP_FLAG_NO_ATOMICS) || (attrEval & ZE_MEMORY_ATOMIC_ATTR_EXP_FLAG_NO_DEVICE_ATOMICS) || (attrEval & ZE_MEMORY_ATOMIC_ATTR_EXP_FLAG_NO_SYSTEM_ATOMICS)) {

if (sharedSystemAllocEnabled) {
if (attr == 0) {
if (isSharedSystemAlloc) {
auto sharedSystemAllocCapabilities = memProp.sharedSystemAllocCapabilities;
if (!(sharedSystemAllocCapabilities & ZE_MEMORY_ACCESS_CAP_FLAG_CONCURRENT_ATOMIC)) {
return ZE_RESULT_ERROR_INVALID_ARGUMENT;
}
} else {
auto deviceAllocCapabilities = memProp.deviceAllocCapabilities;
if (!(deviceAllocCapabilities & ZE_MEMORY_ACCESS_CAP_FLAG_ATOMIC)) {
return ZE_RESULT_ERROR_INVALID_ARGUMENT;
}
}
}
mode = NEO::AtomicAccessMode::none;
} else if ((attr == 0) || (attrEval & ZE_MEMORY_ATOMIC_ATTR_EXP_FLAG_NO_HOST_ATOMICS) || (attrEval & ZE_MEMORY_ATOMIC_ATTR_EXP_FLAG_NO_ATOMICS) || (attrEval & ZE_MEMORY_ATOMIC_ATTR_EXP_FLAG_NO_DEVICE_ATOMICS) || (attrEval & ZE_MEMORY_ATOMIC_ATTR_EXP_FLAG_NO_SYSTEM_ATOMICS)) {
mode = NEO::AtomicAccessMode::none;
}

if (mode == NEO::AtomicAccessMode::invalid) {
return ZE_RESULT_ERROR_INVALID_ARGUMENT;
}

auto memoryManager = device->getDriverHandle()->getMemoryManager();
if (isSharedSystemAlloc) {

DeviceImp *deviceImp = static_cast<DeviceImp *>((L0::Device::fromHandle(hDevice)));
if (sharedSystemAllocEnabled) {
// For BO this feature will be available in the future. Currently only supporting SVM madvise.
if (allocData != nullptr) {
PRINT_DEBUG_STRING(NEO::debugManager.flags.PrintDebugMessages.get(), stderr, "BO madvise not supported");
return ZE_RESULT_ERROR_INVALID_ARGUMENT;
}
auto unifiedMemoryManager = driverHandle->getSvmAllocsManager();

unifiedMemoryManager->sharedSystemAtomicAccess(*deviceImp->getNEODevice(), mode, ptr, size);

} else {
auto memoryManager = device->getDriverHandle()->getMemoryManager();
auto alloc = allocData->gpuAllocations.getGraphicsAllocation(deviceImp->getRootDeviceIndex());
memoryManager->setAtomicAccess(alloc, size, mode, deviceImp->getRootDeviceIndex());
deviceImp->atomicAccessAllocations[allocData] = attr;
Expand All @@ -1115,18 +1124,49 @@ ze_result_t ContextImp::setAtomicAccessAttribute(ze_device_handle_t hDevice, con
ze_result_t ContextImp::getAtomicAccessAttribute(ze_device_handle_t hDevice, const void *ptr, size_t size, ze_memory_atomic_attr_exp_flags_t *pAttr) {

auto device = Device::fromHandle(hDevice);

auto allocData = device->getDriverHandle()->getSvmAllocsManager()->getSVMAlloc(ptr);
if (allocData == nullptr) {
const bool sharedSystemAllocEnabled = device->getNEODevice()->areSharedSystemAllocationsAllowed();

if (allocData == nullptr && !sharedSystemAllocEnabled) {
return ZE_RESULT_ERROR_INVALID_ARGUMENT;
}

DeviceImp *deviceImp = static_cast<DeviceImp *>((L0::Device::fromHandle(hDevice)));
if (deviceImp->atomicAccessAllocations.find(allocData) != deviceImp->atomicAccessAllocations.end()) {
*pAttr = deviceImp->atomicAccessAllocations[allocData];
return ZE_RESULT_SUCCESS;

if (sharedSystemAllocEnabled) {
// For BO this feature will be available in the future. Currently only supporting SVM madvise.
if (allocData != nullptr) {
PRINT_DEBUG_STRING(NEO::debugManager.flags.PrintDebugMessages.get(), stderr, "BO madvise not supported");
return ZE_RESULT_ERROR_INVALID_ARGUMENT;
}

auto unifiedMemoryManager = driverHandle->getSvmAllocsManager();
auto mode = unifiedMemoryManager->getSharedSystemAtomicAccess(*deviceImp->getNEODevice(), ptr, size);
switch (mode) {
case NEO::AtomicAccessMode::device:
*pAttr = ZE_MEMORY_ATOMIC_ATTR_EXP_FLAG_DEVICE_ATOMICS;
break;
case NEO::AtomicAccessMode::host:
*pAttr = ZE_MEMORY_ATOMIC_ATTR_EXP_FLAG_HOST_ATOMICS;
break;
case NEO::AtomicAccessMode::system:
*pAttr = ZE_MEMORY_ATOMIC_ATTR_EXP_FLAG_SYSTEM_ATOMICS;
break;
case NEO::AtomicAccessMode::none:
*pAttr = 0;
break;
case NEO::AtomicAccessMode::invalid:
default:
return ZE_RESULT_ERROR_INVALID_ARGUMENT;
}
} else {
if (deviceImp->atomicAccessAllocations.find(allocData) != deviceImp->atomicAccessAllocations.end()) {
*pAttr = deviceImp->atomicAccessAllocations[allocData];
return ZE_RESULT_SUCCESS;
}
return ZE_RESULT_ERROR_INVALID_ARGUMENT;
}
return ZE_RESULT_ERROR_INVALID_ARGUMENT;
return ZE_RESULT_SUCCESS;
}

ze_result_t ContextImp::createModule(ze_device_handle_t hDevice,
Expand Down
2 changes: 1 addition & 1 deletion shared/source/memory_manager/memory_manager.h
Original file line number Diff line number Diff line change
Expand Up @@ -283,7 +283,7 @@ class MemoryManager {
virtual bool prefetchSharedSystemAlloc(const void *ptr, const size_t size, SubDeviceIdsVec &subDeviceIds, uint32_t rootDeviceIndex) { return true; }
virtual bool setAtomicAccess(GraphicsAllocation *gfxAllocation, size_t size, AtomicAccessMode mode, uint32_t rootDeviceIndex) { return true; }
virtual bool setSharedSystemAtomicAccess(const void *ptr, const size_t size, AtomicAccessMode mode, SubDeviceIdsVec &subDeviceIds, uint32_t rootDeviceIndex) { return true; }

virtual AtomicAccessMode getSharedSystemAtomicAccess(const void *ptr, const size_t size, SubDeviceIdsVec &subDeviceIds, uint32_t rootDeviceIndex) { return AtomicAccessMode::none; }
bool isExternalAllocation(AllocationType allocationType);
LocalMemoryUsageBankSelector *getLocalMemoryUsageBankSelector(AllocationType allocationType, uint32_t rootDeviceIndex);

Expand Down
7 changes: 7 additions & 0 deletions shared/source/memory_manager/unified_memory_manager.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1176,6 +1176,13 @@ void SVMAllocsManager::sharedSystemAtomicAccess(Device &device, AtomicAccessMode
memoryManager->setSharedSystemAtomicAccess(ptr, size, mode, subDeviceIds, device.getRootDeviceIndex());
}

AtomicAccessMode SVMAllocsManager::getSharedSystemAtomicAccess(Device &device, const void *ptr, const size_t size) {
// All vm_ids on a single device for shared system USM allocation
auto subDeviceIds = NEO::SubDevice::getSubDeviceIdsFromDevice(device);

return memoryManager->getSharedSystemAtomicAccess(ptr, size, subDeviceIds, device.getRootDeviceIndex());
}

std::unique_lock<std::mutex> SVMAllocsManager::obtainOwnership() {
return std::unique_lock<std::mutex>(mtxForIndirectAccess);
}
Expand Down
1 change: 1 addition & 0 deletions shared/source/memory_manager/unified_memory_manager.h
Original file line number Diff line number Diff line change
Expand Up @@ -287,6 +287,7 @@ class SVMAllocsManager {
MOCKABLE_VIRTUAL void prefetchMemory(Device &device, CommandStreamReceiver &commandStreamReceiver, const void *ptr, const size_t size);
void prefetchSVMAllocs(Device &device, CommandStreamReceiver &commandStreamReceiver);
void sharedSystemAtomicAccess(Device &device, AtomicAccessMode mode, const void *ptr, const size_t size);
AtomicAccessMode getSharedSystemAtomicAccess(Device &device, const void *ptr, const size_t size);
std::unique_lock<std::mutex> obtainOwnership();

std::map<CommandStreamReceiver *, InternalAllocationsTracker> indirectAllocationsResidency;
Expand Down
74 changes: 35 additions & 39 deletions shared/source/os_interface/linux/drm_memory_manager.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -299,28 +299,26 @@ bool DrmMemoryManager::setSharedSystemMemAdvise(const void *ptr, const size_t si
auto &drm = this->getDrm(rootDeviceIndex);
auto ioctlHelper = drm.getIoctlHelper();

uint32_t attribute = 0;
uint32_t attribute = ioctlHelper->getPreferredLocationAdvise();
uint64_t param = 0;

uint64_t preferredLocation = 0;
uint64_t policy = 0;

switch (memAdviseOp) {
case MemAdvise::setPreferredLocation:
attribute = ioctlHelper->getPreferredLocationAdvise();
param = (static_cast<uint64_t>(-1) << 32) //-1 as currently not supported and ignored. This will be useful in multi device settings.
| static_cast<uint64_t>(ioctlHelper->getDrmParamValue(DrmParam::memoryClassDevice));
break;
case MemAdvise::clearPreferredLocation:
// Assumes that the default location is VRAM, i.e. 1 == DrmParam::memoryClassDevice
attribute = ioctlHelper->getPreferredLocationAdvise();
param = (static_cast<uint64_t>(-1) << 32) | static_cast<uint64_t>(ioctlHelper->getDrmParamValue(DrmParam::memoryClassDevice));
break;
case MemAdvise::setSystemMemoryPreferredLocation:
attribute = ioctlHelper->getPreferredLocationAdvise();
param = (static_cast<uint64_t>(-1) << 32) | static_cast<uint64_t>(ioctlHelper->getDrmParamValue(DrmParam::memoryClassSystem));
break;
case MemAdvise::clearSystemMemoryPreferredLocation:
attribute = ioctlHelper->getPreferredLocationAdvise();
param = (static_cast<uint64_t>(-1) << 32) | static_cast<uint64_t>(ioctlHelper->getDrmParamValue(DrmParam::memoryClassDevice));
break;
case MemAdvise::clearSystemMemoryPreferredLocation: {
// Assumes that the default location is VRAM, i.e. 1 == DrmParam::memoryAdviseLocationDevice
preferredLocation = static_cast<uint64_t>(ioctlHelper->getDrmParamValue(DrmParam::memoryAdviseLocationDevice));
policy = static_cast<uint64_t>(ioctlHelper->getDrmParamValue(DrmParam::memoryAdviseMigrationPolicyAllPages));
param = (preferredLocation << 32) | policy;
} break;
case MemAdvise::setSystemMemoryPreferredLocation: {
preferredLocation = static_cast<uint64_t>(ioctlHelper->getDrmParamValue(DrmParam::memoryAdviseLocationSystem));
policy = static_cast<uint64_t>(ioctlHelper->getDrmParamValue(DrmParam::memoryAdviseMigrationPolicySystemPages));
param = (preferredLocation << 32) | policy;
} break;
default:
return false;
}
Expand All @@ -341,29 +339,10 @@ bool DrmMemoryManager::setSharedSystemAtomicAccess(const void *ptr, const size_t
auto &drm = this->getDrm(rootDeviceIndex);
auto ioctlHelper = drm.getIoctlHelper();

uint32_t attribute = 0;
uint64_t param = 0;
uint32_t attribute = ioctlHelper->getAtomicAdvise(false);
uint32_t atomicParam = ioctlHelper->getAtomicAccess(mode);

switch (mode) {
case AtomicAccessMode::device:
attribute = ioctlHelper->getAtomicAdvise(false);
param = (static_cast<uint64_t>(ioctlHelper->getDrmParamValue(DrmParam::atomicClassDevice)) << 32);
break;
case AtomicAccessMode::system:
attribute = ioctlHelper->getAtomicAdvise(false);
param = (static_cast<uint64_t>(ioctlHelper->getDrmParamValue(DrmParam::atomicClassGlobal)) << 32);
break;
case AtomicAccessMode::host:
attribute = ioctlHelper->getAtomicAdvise(false);
param = (static_cast<uint64_t>(ioctlHelper->getDrmParamValue(DrmParam::atomicClassSystem)) << 32);
break;
case AtomicAccessMode::none:
attribute = ioctlHelper->getAtomicAdvise(false);
param = (static_cast<uint64_t>(ioctlHelper->getDrmParamValue(DrmParam::atomicClassUndefined)) << 32);
break;
default:
return false;
}
uint64_t param = (static_cast<uint64_t>(atomicParam) << 32) | 0;
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

why we need to do those shifts after returning the value from iocthHelper?
IoctlHelper should rather do this translation internally if needed.
Also what is the point of | 0 ?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ioctlHelper->getAtomicAccess(mode); returns 32bit int. Therefore, it is needed to shift it to the left since param needs to be 64bit containing two 32bit values. I will remove the | 0 its not needed.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

you can change it to 64 bit

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Its being used in many other places especially for legacy i915. That will require major refactoring.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

we need it anyway, as right now you need to do those shifts, which requires helper function


// Apply the shared system USM IOCTL to all the VMs of the device
std::vector<uint32_t> vmIds;
Expand All @@ -377,6 +356,23 @@ bool DrmMemoryManager::setSharedSystemAtomicAccess(const void *ptr, const size_t
return result;
}

AtomicAccessMode DrmMemoryManager::getSharedSystemAtomicAccess(const void *ptr, const size_t size, SubDeviceIdsVec &subDeviceIds, uint32_t rootDeviceIndex) {

auto &drm = this->getDrm(rootDeviceIndex);
auto ioctlHelper = drm.getIoctlHelper();

// Apply the shared system USM IOCTL to all the VMs of the device
std::vector<uint32_t> vmIds;
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

please use StackVec

vmIds.reserve(subDeviceIds.size());
for (auto subDeviceId : subDeviceIds) {
vmIds.push_back(drm.getVirtualMemoryAddressSpace(subDeviceId));
}

auto result = ioctlHelper->getVmSharedSystemAtomicAttribute(reinterpret_cast<uint64_t>(ptr), size, vmIds);

return result;
}

bool DrmMemoryManager::setAtomicAccess(GraphicsAllocation *gfxAllocation, size_t size, AtomicAccessMode mode, uint32_t rootDeviceIndex) {
auto drmAllocation = static_cast<DrmAllocation *>(gfxAllocation);

Expand Down
1 change: 1 addition & 0 deletions shared/source/os_interface/linux/drm_memory_manager.h
Original file line number Diff line number Diff line change
Expand Up @@ -92,6 +92,7 @@ class DrmMemoryManager : public MemoryManager {
bool prefetchSharedSystemAlloc(const void *ptr, const size_t size, SubDeviceIdsVec &subDeviceIds, uint32_t rootDeviceIndex) override;
bool setAtomicAccess(GraphicsAllocation *gfxAllocation, size_t size, AtomicAccessMode mode, uint32_t rootDeviceIndex) override;
bool setSharedSystemAtomicAccess(const void *ptr, const size_t size, AtomicAccessMode mode, SubDeviceIdsVec &subDeviceIds, uint32_t rootDeviceIndex) override;
AtomicAccessMode getSharedSystemAtomicAccess(const void *ptr, const size_t size, SubDeviceIdsVec &subDeviceIds, uint32_t rootDeviceIndex) override;
[[nodiscard]] std::unique_lock<std::mutex> acquireAllocLock();
std::vector<GraphicsAllocation *> &getSysMemAllocs();
std::vector<GraphicsAllocation *> &getLocalMemAllocs(uint32_t rootDeviceIndex);
Expand Down
5 changes: 5 additions & 0 deletions shared/source/os_interface/linux/drm_wrappers.h
Original file line number Diff line number Diff line change
Expand Up @@ -324,6 +324,7 @@ enum class DrmIoctl {
dg1GemCreateExt,
gemCreateExt,
gemVmAdvise,
gemVmGetMemRangeAttr,
gemVmPrefetch,
uuidRegister,
uuidUnregister,
Expand Down Expand Up @@ -371,6 +372,10 @@ enum class DrmParam {
execRender,
memoryClassDevice,
memoryClassSystem,
memoryAdviseLocationDevice,
memoryAdviseLocationSystem,
memoryAdviseMigrationPolicyAllPages,
memoryAdviseMigrationPolicySystemPages,
mmapOffsetWb,
mmapOffsetWc,
paramHasPooledEu,
Expand Down
1 change: 1 addition & 0 deletions shared/source/os_interface/linux/ioctl_helper.h
Original file line number Diff line number Diff line change
Expand Up @@ -133,6 +133,7 @@ class IoctlHelper {
virtual std::optional<MemoryClassInstance> getPreferredLocationRegion(PreferredLocation memoryLocation, uint32_t memoryInstance) = 0;
virtual bool setVmBoAdvise(int32_t handle, uint32_t attribute, void *region) = 0;
virtual bool setVmSharedSystemMemAdvise(uint64_t handle, const size_t size, const uint32_t attribute, const uint64_t param, const std::vector<uint32_t> &vmIds) { return true; }
virtual AtomicAccessMode getVmSharedSystemAtomicAttribute(uint64_t handle, const size_t size, const std::vector<uint32_t> &vmIds) { return AtomicAccessMode::none; }
virtual bool setVmBoAdviseForChunking(int32_t handle, uint64_t start, uint64_t length, uint32_t attribute, void *region) = 0;
virtual bool setVmPrefetch(uint64_t start, uint64_t length, uint32_t region, uint32_t vmId) = 0;
virtual bool setGemTiling(void *setTiling) = 0;
Expand Down
Loading