diff --git a/.gitattributes b/.gitattributes new file mode 100644 index 000000000..55bb72f28 --- /dev/null +++ b/.gitattributes @@ -0,0 +1,21 @@ +# Set the default behavior, in case people don't have core.autolf set. +* text=auto + +# Explicitly declare text files you want to always be normalized and converted +# to have LF line endings on checkout. +*.c text eol=lf +*.cpp text eol=lf +*.cc text eol=lf +*.h text eol=lf +*.hpp text eol=lf +*.txt text eol=lf + +# Define files to support auto-remove trailing white space +# Need to run the command below, before add modified file(s) to the staging area +# git config filter.trimspace.clean 'sed -e "s/[[:space:]]*$//g"' +*.cpp filter=trimspace +*.c filter=trimspace +*.h filter=trimspacecpp +*.hpp filter=trimspace +*.md filter=trimspace + diff --git a/CMakeLists.txt b/CMakeLists.txt index 406c4f845..29ed85f88 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,4 +1,4 @@ -# Copyright (C) 2017-2021 Advanced Micro Devices, Inc. All Rights Reserved. +# Copyright (c) 2017 - 2021 Advanced Micro Devices, Inc. All Rights Reserved. # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal @@ -20,317 +20,11 @@ cmake_minimum_required(VERSION 3.5) -project(ROCclr VERSION "1.0.0" LANGUAGES C CXX) +project(ROCclr) -include(CMakePackageConfigHelpers) - -#decide whether .so is to be build or .a -set(BUILD_SHARED_LIBS ON CACHE BOOL "Build shared library (.so) or not.") - -#example command: - -#cmake -DOPENCL_DIR=/path to/opencl .. - -set(ROCCLR_CONFIG_NAME ROCclrConfig.cmake) -set(ROCCLR_TARGETS_NAME rocclr-targets.cmake) -set(ROCCLR_VERSION_NAME rocclr-config-version.cmake) -set(ROCCLR_PACKAGE_PREFIX lib/cmake/rocclr) -set(ROCCLR_PREFIX_CODE) -set(ROCCLR_TARGETS_PATH - "${CMAKE_CURRENT_BINARY_DIR}/${ROCCLR_PACKAGE_PREFIX}/${ROCCLR_TARGETS_NAME}") -set(ROCCLR_VERSION_PATH - "${CMAKE_CURRENT_BINARY_DIR}/${ROCCLR_PACKAGE_PREFIX}/${ROCCLR_VERSION_NAME}") - -# Generate the build-tree package. -configure_file("cmake/${ROCCLR_CONFIG_NAME}.in" - "${ROCCLR_PACKAGE_PREFIX}/${ROCCLR_CONFIG_NAME}" - @ONLY) - -write_basic_package_version_file("${ROCCLR_VERSION_PATH}" - VERSION "${ROCclr_VERSION}" - COMPATIBILITY SameMajorVersion) - -set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} "${CMAKE_CURRENT_SOURCE_DIR}/cmake" "${CMAKE_CURRENT_SOURCE_DIR}/cmake/modules") - -# FIXME: Handling legacy custom boolean matching "yes" or "no" -if(DEFINED USE_COMGR_LIBRARY) - if(${USE_COMGR_LIBRARY} MATCHES "yes") - set(USE_COMGR_LIBRARY ON) - elseif(${USE_COMGR_LIBRARY} MATCHES "no") - set(USE_COMGR_LIBRARY OFF) - endif() -endif() - -option(USE_COMGR_LIBRARY "Use comgr library" ON) - -find_package(amd_comgr REQUIRED CONFIG - PATHS - /opt/rocm/ - PATH_SUFFIXES - cmake/amd_comgr - lib/cmake/amd_comgr -) -message(STATUS "Code Object Manager found at ${amd_comgr_DIR}.") - -find_package(hsa-runtime64 1.3 REQUIRED CONFIG - PATHS - /opt/rocm/ - PATH_SUFFIXES - cmake/hsa-runtime64 - lib/cmake/hsa-runtime64 - lib64/cmake/hsa-runtime64 -) -message(STATUS "HSA Runtime found at ${hsa-runtime64_DIR}.") - -if( NOT OPENCL_DIR ) - find_path(OPENCL_INCLUDE_DIR - NAMES OpenCL/cl.h CL/cl.h - PATH_SUFFIXES include opencl/include inc include/x86_64 include/x64 - PATHS /opt/rocm - DOC "OpenCL include header OpenCL/cl.h or CL/cl.h" - ) - - if( NOT OPENCL_INCLUDE_DIR ) - unset(OPENCL_INCLUDE_DIR CACHE) - set(OPENCL_INCLUDE_DIR "" CACHE PATH "" FORCE ) - endif() - message(STATUS "Opencl found at ${OPENCL_INCLUDE_DIR}.") -endif() - - - -set(THREADS_PREFER_PTHREAD_FLAG ON) -find_package(Threads REQUIRED) - -set(CMAKE_CXX_STANDARD 14) -set(CMAKE_CXX_STANDARD_REQUIRED ON) -set(CMAKE_CXX_EXTENSIONS OFF) -set(CMAKE_POSITION_INDEPENDENT_CODE ON) - -if((CMAKE_CXX_COMPILER_ID STREQUAL "GNU") OR - (CMAKE_CXX_COMPILER_ID MATCHES "Clang")) - add_definitions( - # Enabling -Wextra or -pedantic will cause - # thousands of warnings. Keep things simple for now. - -Wall - # Makefile build adds -fno-strict-aliasing instead. - -Wno-strict-aliasing - # This one seems impossible to fix for now. - # There are hundreds of instances of unused vars/functions - # throughout the code base. - -Wno-unused-variable) -endif() - -add_definitions(-D__x86_64__ -DOPENCL_MAJOR=2 -DOPENCL_MINOR=1 -DCL_TARGET_OPENCL_VERSION=220 -DATI_OS_LINUX -DATI_ARCH_X86 -DLITTLEENDIAN_CPU -DATI_BITS_64 -DWITH_TARGET_AMDGCN -DOPENCL_EXPORTS -DCL_USE_DEPRECATED_OPENCL_1_0_APIS -DCL_USE_DEPRECATED_OPENCL_1_1_APIS -DCL_USE_DEPRECATED_OPENCL_1_2_APIS -DCL_USE_DEPRECATED_OPENCL_2_0_APIS -DWITH_LIGHTNING_COMPILER) -add_definitions(-DOPENCL_C_MAJOR=2 -DOPENCL_C_MINOR=0) - -set(CMAKE_SHARED_LINKER_FLAGS "-Wl,-Bsymbolic -Wl,--unresolved-symbols=report-all") -set (CMAKE_LIBRARY_OUTPUT_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/lib) - -if(CMAKE_BUILD_TYPE MATCHES "^Debug$") - add_definitions(-DDEBUG) -endif() - -if(DEV_LOG_ENABLE MATCHES "yes") - add_definitions(-DDEV_LOG_ENABLE) -endif() - -option(BUILD_LINUXPRO "Build LinuxPro" OFF) -if (BUILD_LINUXPRO) - message(STATUS "Building LinuxPro") - add_definitions(-DROCCLR_DISABLE_PREVEGA) - add_definitions(-DROCCLR_ENABLE_GL_SHARING) -endif() - -option(BUILD_PAL "Build PAL backend" OFF) -if (BUILD_PAL) - add_subdirectory(device/pal) - - add_subdirectory(compiler/sc/HSAIL) -else () - add_subdirectory(device/rocm) -endif() - -set(oclruntime_src - thread/thread.cpp - thread/monitor.cpp - thread/semaphore.cpp - utils/flags.cpp - utils/debug.cpp - device/appprofile.cpp - device/device.cpp - device/hwdebug.cpp - device/blitcl.cpp - device/blit.cpp - device/devkernel.cpp - device/devwavelimiter.cpp - device/devprogram.cpp - device/devhcprintf.cpp - device/devhcmessages.cpp - device/devhostcall.cpp - device/comgrctx.cpp - device/hsailctx.cpp - platform/activity.cpp - platform/kernel.cpp - platform/context.cpp - platform/command.cpp - platform/ndrange.cpp - platform/runtime.cpp - platform/memory.cpp - platform/program.cpp - platform/commandqueue.cpp - platform/agent.cpp - os/os_win32.cpp - os/alloc.cpp - os/os.cpp - os/os_posix.cpp - compiler/lib/utils/options.cpp - elf/elf.cpp -) - -add_library(amdrocclr_static STATIC ${oclruntime_src}) - -set_target_properties(amdrocclr_static PROPERTIES POSITION_INDEPENDENT_CODE ON) - -target_include_directories(amdrocclr_static - PUBLIC - $ - $ - $ - $ - $ - $ - $ - $ - $ - $ - # GL and EGL headers. - $ - $ - $) - -if(USE_COMGR_LIBRARY) - # FIXME: This should not be part of the public interface. Downstream - # users need to add these definitions. This should be defined in a - # config header here so other builds don't need to be aware of this. - if(${BUILD_SHARED_LIBS}) - target_compile_definitions(amdrocclr_static PUBLIC USE_COMGR_LIBRARY COMGR_DYN_DLL) - else() - target_compile_definitions(amdrocclr_static PUBLIC USE_COMGR_LIBRARY) - endif() - #Needed here to export as transitive dependency in rocclr-targets.cmake - target_link_libraries(amdrocclr_static PRIVATE amd_comgr) +if (${CMAKE_SOURCE_DIR} STREQUAL ${CMAKE_CURRENT_SOURCE_DIR}) + message(AUTHOR_WARNING "ROCclr is being built as a standalone project. This isn't supported anymore.") endif() -target_link_libraries(amdrocclr_static PUBLIC Threads::Threads) -target_link_libraries(amdrocclr_static PUBLIC dl) - - -if (BUILD_PAL) - target_sources(amdrocclr_static PRIVATE $) - target_link_libraries(amdrocclr_static PRIVATE amdhsaloader) - target_link_libraries(amdrocclr_static PRIVATE pal) - - target_compile_definitions(amdrocclr_static PRIVATE WITH_PAL_DEVICE) - - export(TARGETS amdrocclr_static rocclrpal amdhsaloader amdhsacode pal addrlib vam metrohash cwpack gpuopen ddCore tiny_printf mpack rapidjson - FILE ${ROCCLR_TARGETS_PATH}) -else() - target_compile_definitions(amdrocclr_static PRIVATE WITH_HSA_DEVICE) - - #ROCclr being static lib shall not link hsa-runtime64. - #Needed here to export as transitive dependency in rocclr-targets.cmake - target_link_libraries(amdrocclr_static PRIVATE hsa-runtime64::hsa-runtime64) - target_sources(amdrocclr_static PRIVATE $) - export(TARGETS amdrocclr_static - FILE ${ROCCLR_TARGETS_PATH}) -endif() - -if (UNIX) - set(ROCclr_DEFAULT_INSTALL_PREFIX "/opt/rocm/rocclr") - - find_library(LIBRT rt) - if (LIBRT) - target_link_libraries(amdrocclr_static PUBLIC ${LIBRT}) - endif() - - if (LIBNUMA) - target_link_libraries(amdrocclr_static PUBLIC ${LIBNUMA}) - endif() -endif() -#comment out as it's not available in cmake 3.5 -#if(CMAKE_INSTALL_PREFIX_INITIALIZED_TO_DEFAULT) -if(NOT CMAKE_INSTALL_PREFIX) - if(CMAKE_BUILD_TYPE MATCHES Debug) - set(CMAKE_INSTALL_PREFIX ${CMAKE_CURRENT_SOURCE_DIR} CACHE PATH "Installation path for rocclr" FORCE) - #elseif(CMAKE_BUILD_TYPE MATCHES Release) - else() - set(CMAKE_INSTALL_PREFIX ${ROCclr_DEFAULT_INSTALL_PREFIX} CACHE PATH "Installation path for rocclr" FORCE) - # message(FATAL_ERROR "Invalid CMAKE_BUILD_TYPE specified. Valid values are Debug and Release") - endif() -#endif() -endif() - - -install( TARGETS amdrocclr_static - EXPORT rocclr - ARCHIVE DESTINATION lib - LIBRARY DESTINATION lib - COMPONENT applications) - -install ( - DIRECTORY include/ - DESTINATION include) -install ( - DIRECTORY "${CMAKE_CURRENT_SOURCE_DIR}/elf" - DESTINATION include - FILES_MATCHING PATTERN "*.h*") -install ( - DIRECTORY "${CMAKE_CURRENT_SOURCE_DIR}/compiler" - DESTINATION include - FILES_MATCHING PATTERN "*.h*") -install ( - DIRECTORY "${CMAKE_CURRENT_SOURCE_DIR}/utils" - DESTINATION include - FILES_MATCHING PATTERN "*.h*") -install ( - DIRECTORY "${CMAKE_CURRENT_SOURCE_DIR}/platform" - DESTINATION include - FILES_MATCHING PATTERN "*.h*") -install ( - DIRECTORY "${CMAKE_CURRENT_SOURCE_DIR}/thread" - DESTINATION include - FILES_MATCHING PATTERN "*.h*") -install ( - DIRECTORY "${CMAKE_CURRENT_SOURCE_DIR}/os" - DESTINATION include - FILES_MATCHING PATTERN "*.h*") -install ( - DIRECTORY "${CMAKE_CURRENT_SOURCE_DIR}/device" - DESTINATION include - FILES_MATCHING PATTERN "*.h*") - -# Generate the install-tree package. -set(ROCCLR_PREFIX_CODE " -# Derive absolute install prefix from config file path. -get_filename_component(ROCCLR_PREFIX \"\${CMAKE_CURRENT_LIST_FILE}\" PATH)") -string(REGEX REPLACE "/" ";" count "${ROCCLR_PACKAGE_PREFIX}") -foreach(p ${count}) - set(ROCCLR_PREFIX_CODE "${ROCCLR_PREFIX_CODE} -get_filename_component(ROCCLR_PREFIX \"\${ROCCLR_PREFIX}\" PATH)") -endforeach() - -configure_file("cmake/${ROCCLR_CONFIG_NAME}.in" - "${CMAKE_CURRENT_BINARY_DIR}/${ROCCLR_CONFIG_NAME}.install" - @ONLY) - -install(FILES - "${CMAKE_CURRENT_BINARY_DIR}/${ROCCLR_CONFIG_NAME}.install" - DESTINATION "${ROCCLR_PACKAGE_PREFIX}" - RENAME "${ROCCLR_CONFIG_NAME}") -install(EXPORT rocclr - DESTINATION "${ROCCLR_PACKAGE_PREFIX}" - FILE "${ROCCLR_TARGETS_NAME}") -install(FILES - "${ROCCLR_VERSION_PATH}" - DESTINATION "${ROCCLR_PACKAGE_PREFIX}") +list(APPEND CMAKE_MODULE_PATH ${CMAKE_CURRENT_SOURCE_DIR}/cmake) +include(ROCclr) diff --git a/LICENSE.txt b/LICENSE.txt index 9a1f87d2c..57378c669 100644 --- a/LICENSE.txt +++ b/LICENSE.txt @@ -1,4 +1,4 @@ -Copyright (c) 2008-2021 Advanced Micro Devices, Inc. +Copyright (c) 2008 - 2021 Advanced Micro Devices, Inc. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/cmake/FindAMD_HSA_LOADER.cmake b/cmake/FindAMD_HSA_LOADER.cmake new file mode 100644 index 000000000..0413ea327 --- /dev/null +++ b/cmake/FindAMD_HSA_LOADER.cmake @@ -0,0 +1,56 @@ +# Copyright (c) 2020 - 2021 Advanced Micro Devices, Inc. All rights reserved. +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +# THE SOFTWARE. + +if(AMD_HSA_LOADER_FOUND) + return() +endif() + +find_path(AMD_LIBELF_INCLUDE_DIR libelf.h + HINTS + ${AMD_LIBELF_PATH} + PATHS + ${CMAKE_SOURCE_DIR}/hsail-compiler/lib/loaders/elf/utils/libelf + ${CMAKE_SOURCE_DIR}/../hsail-compiler/lib/loaders/elf/utils/libelf + ${CMAKE_SOURCE_DIR}/../../hsail-compiler/lib/loaders/elf/utils/libelf + NO_DEFAULT_PATH) + +find_path(AMD_HSAIL_INCLUDE_DIR hsa.h + HINTS + ${AMD_SC_PATH} + PATHS + ${CMAKE_SOURCE_DIR}/sc + ${CMAKE_SOURCE_DIR}/../sc + ${CMAKE_SOURCE_DIR}/../../sc + PATH_SUFFIXES + HSAIL/include) + +include(FindPackageHandleStandardArgs) +find_package_handle_standard_args(AMD_HSA_LOADER + "\nHSA Loader not found" + AMD_LIBELF_INCLUDE_DIR AMD_HSAIL_INCLUDE_DIR) +mark_as_advanced(AMD_LIBELF_INCLUDE_DIR AMD_HSAIL_INCLUDE_DIR) + +set(USE_AMD_LIBELF "yes" CACHE FORCE "") +# TODO compiler team requested supporting sp3 disassembly +set(NO_SI_SP3 "yes" CACHE FORCE "") +set(HSAIL_COMPILER_SOURCE_DIR "${AMD_LIBELF_INCLUDE_DIR}/../../../../..") +add_subdirectory("${AMD_LIBELF_INCLUDE_DIR}" ${CMAKE_CURRENT_BINARY_DIR}/libelf) +add_subdirectory("${AMD_HSAIL_INCLUDE_DIR}/../ext/libamdhsacode" ${CMAKE_CURRENT_BINARY_DIR}/libamdhsacode) +add_subdirectory("${AMD_HSAIL_INCLUDE_DIR}/../ext/loader" ${CMAKE_CURRENT_BINARY_DIR}/loader) diff --git a/cmake/FindAMD_OPENCL.cmake b/cmake/FindAMD_OPENCL.cmake new file mode 100644 index 000000000..bae08a70c --- /dev/null +++ b/cmake/FindAMD_OPENCL.cmake @@ -0,0 +1,74 @@ +# Copyright (c) 2020 - 2021 Advanced Micro Devices, Inc. All rights reserved. +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +# THE SOFTWARE. + +if(AMD_OPENCL_FOUND) + return() +endif() + +find_path(AMD_OPENCL_INCLUDE_DIR cl.h + HINTS + ${AMD_OPENCL_PATH} + PATHS + # gerrit repo name + ${CMAKE_SOURCE_DIR}/opencl + ${CMAKE_SOURCE_DIR}/../opencl + ${CMAKE_SOURCE_DIR}/../../opencl + # github repo name + ${CMAKE_SOURCE_DIR}/ROCm-OpenCL-Runtime + ${CMAKE_SOURCE_DIR}/../ROCm-OpenCL-Runtime + ${CMAKE_SOURCE_DIR}/../../ROCm-OpenCL-Runtime + # jenkins repo name + ${CMAKE_SOURCE_DIR}/opencl-on-vdi + ${CMAKE_SOURCE_DIR}/../opencl-on-vdi + ${CMAKE_SOURCE_DIR}/../../opencl-on-vdi + ${CMAKE_SOURCE_DIR}/opencl-on-rocclr + ${CMAKE_SOURCE_DIR}/../opencl-on-rocclr + ${CMAKE_SOURCE_DIR}/../../opencl-on-rocclr + PATH_SUFFIXES + khronos/headers/opencl2.2/CL + NO_DEFAULT_PATH) + +include(FindPackageHandleStandardArgs) +find_package_handle_standard_args(AMD_OPENCL + "\nAMD OpenCL not found" + AMD_OPENCL_INCLUDE_DIR) +mark_as_advanced(AMD_OPENCL_INCLUDE_DIR) + +set(AMD_OPENCL_DEFS + -DHAVE_CL2_HPP + -DOPENCL_MAJOR=2 + -DOPENCL_MINOR=2 + -DOPENCL_C_MAJOR=2 + -DOPENCL_C_MINOR=0 + -DCL_TARGET_OPENCL_VERSION=220 + -DCL_USE_DEPRECATED_OPENCL_1_0_APIS + -DCL_USE_DEPRECATED_OPENCL_1_1_APIS + -DCL_USE_DEPRECATED_OPENCL_1_2_APIS + -DCL_USE_DEPRECATED_OPENCL_2_0_APIS) +mark_as_advanced(AMD_OPENCL_DEFS) + +set(AMD_OPENCL_INCLUDE_DIRS + ${AMD_OPENCL_INCLUDE_DIR} + ${AMD_OPENCL_INCLUDE_DIR}/.. + ${AMD_OPENCL_INCLUDE_DIR}/../.. + ${AMD_OPENCL_INCLUDE_DIR}/../../.. + ${AMD_OPENCL_INCLUDE_DIR}/../../../.. + ${AMD_OPENCL_INCLUDE_DIR}/../../../../amdocl) +mark_as_advanced(AMD_OPENCL_INCLUDE_DIRS) diff --git a/cmake/FindAMD_PAL.cmake b/cmake/FindAMD_PAL.cmake new file mode 100644 index 000000000..557444458 --- /dev/null +++ b/cmake/FindAMD_PAL.cmake @@ -0,0 +1,68 @@ +# Copyright (c) 2020 - 2021 Advanced Micro Devices, Inc. All rights reserved. +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +# THE SOFTWARE. + +if(AMD_PAL_FOUND) + return() +endif() + +find_path(AMD_ASIC_REG_INCLUDE_DIR nv_id.h + HINTS + ${AMD_DRIVERS_PATH} + PATHS + # p4 repo layout + ${CMAKE_SOURCE_DIR}/drivers + ${CMAKE_SOURCE_DIR}/../drivers + ${CMAKE_SOURCE_DIR}/../../drivers + # github ent repo layout + ${CMAKE_SOURCE_DIR}/drivers/drivers + ${CMAKE_SOURCE_DIR}/../drivers/drivers + ${CMAKE_SOURCE_DIR}/../../drivers/drivers + PATH_SUFFIXES + inc/asic_reg) + +find_path(AMD_HSAIL_INCLUDE_DIR hsa.h + HINTS + ${AMD_SC_PATH} + PATHS + ${CMAKE_SOURCE_DIR}/sc + ${CMAKE_SOURCE_DIR}/../sc + ${CMAKE_SOURCE_DIR}/../../sc + PATH_SUFFIXES + HSAIL/include) + +find_path(AMD_PAL_INCLUDE_DIR pal.h + HINTS + ${AMD_PAL_PATH} + PATHS + ${CMAKE_SOURCE_DIR}/pal + ${CMAKE_SOURCE_DIR}/../pal + ${CMAKE_SOURCE_DIR}/../../pal + PATH_SUFFIXES + inc/core) + +include(FindPackageHandleStandardArgs) +find_package_handle_standard_args(AMD_PAL + "\nPAL not found" + AMD_ASIC_REG_INCLUDE_DIR AMD_HSAIL_INCLUDE_DIR AMD_PAL_INCLUDE_DIR) +mark_as_advanced(AMD_ASIC_REG_INCLUDE_DIR AMD_HSAIL_INCLUDE_DIR AMD_PAL_INCLUDE_DIR) + +set(GLOBAL_ROOT_SRC_DIR "${AMD_ASIC_REG_INCLUDE_DIR}/../../..") +set(PAL_SC_PATH "${AMD_HSAIL_INCLUDE_DIR}/../..") +add_subdirectory("${AMD_PAL_INCLUDE_DIR}/../.." ${CMAKE_CURRENT_BINARY_DIR}/pal) diff --git a/cmake/modules/FindROCR.cmake b/cmake/FindAMD_UGL.cmake similarity index 58% rename from cmake/modules/FindROCR.cmake rename to cmake/FindAMD_UGL.cmake index 13862f3c1..1246476e9 100644 --- a/cmake/modules/FindROCR.cmake +++ b/cmake/FindAMD_UGL.cmake @@ -1,4 +1,4 @@ -# Copyright (C) 2017-2021 Advanced Micro Devices, Inc. All Rights Reserved. +# Copyright (c) 2020 - 2021 Advanced Micro Devices, Inc. All rights reserved. # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal @@ -18,19 +18,31 @@ # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN # THE SOFTWARE. -# Try to find ROCR (Radeon Open Compute Runtime) -# -# Once found, this will define: -# - ROCR_FOUND - ROCR status (found or not found) -# - ROCR_INCLUDES - Required ROCR include directories -# - ROCR_LIBRARIES - Required ROCR libraries -find_path(FIND_ROCR_INCLUDES hsa.h HINTS /opt/rocm/include /opt/rocm/hsa/include PATH_SUFFIXES hsa) -find_library(FIND_ROCR_LIBRARIES hsa-runtime64 HINTS /opt/rocm/lib /opt/rocm/hsa/lib) +if(AMD_UGL_FOUND) + return() +endif() + +find_path(AMD_UGL_INCLUDE_DIR GL/glx.h + HINTS + ${AMD_DRIVERS_PATH} + PATHS + # p4 repo layout + ${CMAKE_SOURCE_DIR}/drivers + ${CMAKE_SOURCE_DIR}/../drivers + ${CMAKE_SOURCE_DIR}/../../drivers + # github ent repo layout + ${CMAKE_SOURCE_DIR}/drivers/drivers + ${CMAKE_SOURCE_DIR}/../drivers/drivers + ${CMAKE_SOURCE_DIR}/../../drivers/drivers + PATH_SUFFIXES + ugl/inc + NO_DEFAULT_PATH) include(FindPackageHandleStandardArgs) -find_package_handle_standard_args(ROCR DEFAULT_MSG - FIND_ROCR_INCLUDES FIND_ROCR_LIBRARIES) -mark_as_advanced(FIND_ROCR_INCLUDES FIND_ROCR_LIBRARIES) +find_package_handle_standard_args(AMD_UGL + "\nAMD UGL not found" + AMD_UGL_INCLUDE_DIR) +mark_as_advanced(AMD_UGL_INCLUDE_DIR) -set(ROCR_INCLUDES ${FIND_ROCR_INCLUDES}) -set(ROCR_LIBRARIES ${FIND_ROCR_LIBRARIES}) +set(AMD_UGL_INCLUDE_DIRS ${AMD_UGL_INCLUDE_DIR} ${ROCCLR_SRC_DIR}/device/gpu/gslbe/src/rt) +mark_as_advanced(AMD_UGL_INCLUDE_DIRS) diff --git a/cmake/ROCclr.cmake b/cmake/ROCclr.cmake new file mode 100644 index 000000000..fd09d38b1 --- /dev/null +++ b/cmake/ROCclr.cmake @@ -0,0 +1,141 @@ +# Copyright (c) 2020 - 2021 Advanced Micro Devices, Inc. All rights reserved. +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +# THE SOFTWARE. + +cmake_minimum_required(VERSION 3.5) + +# ROCclr abstracts the usage of multiple AMD compilers and runtimes. +# It is possible to support multiple backends concurrently in the same binary. +option(ROCCLR_ENABLE_HSAIL "Enable support for HSAIL compiler" OFF) +option(ROCCLR_ENABLE_LC "Enable support for LC compiler" ON) +option(ROCCLR_ENABLE_HSA "Enable support for HSA runtime" ON) +option(ROCCLR_ENABLE_PAL "Enable support for PAL runtime" OFF) + +if((NOT ROCCLR_ENABLE_HSAIL) AND (NOT ROCCLR_ENABLE_LC)) + message(FATAL "Support for at least one compiler needs to be enabled!") +endif() + +if((NOT ROCCLR_ENABLE_HSA) AND (NOT ROCCLR_ENABLE_PAL)) + message(FATAL "Support for at least one runtime needs to be enabled!") +endif() + +set(THREADS_PREFER_PTHREAD_FLAG ON) +find_package(Threads REQUIRED) + +find_package(AMD_OPENCL) + +add_library(rocclr STATIC) + +include(ROCclrCompilerOptions) + +set(ROCCLR_SRC_DIR "${CMAKE_CURRENT_LIST_DIR}/..") +mark_as_advanced(ROCCLR_SRC_DIR) + +set_target_properties(rocclr PROPERTIES + CXX_STANDARD 14 + CXX_STANDARD_REQUIRED ON + CXX_EXTENSIONS OFF + POSITION_INDEPENDENT_CODE ON) + +target_sources(rocclr PRIVATE + ${ROCCLR_SRC_DIR}/compiler/lib/utils/options.cpp + ${ROCCLR_SRC_DIR}/device/appprofile.cpp + ${ROCCLR_SRC_DIR}/device/blit.cpp + ${ROCCLR_SRC_DIR}/device/blitcl.cpp + ${ROCCLR_SRC_DIR}/device/comgrctx.cpp + ${ROCCLR_SRC_DIR}/device/devhcmessages.cpp + ${ROCCLR_SRC_DIR}/device/devhcprintf.cpp + ${ROCCLR_SRC_DIR}/device/devhostcall.cpp + ${ROCCLR_SRC_DIR}/device/device.cpp + ${ROCCLR_SRC_DIR}/device/devkernel.cpp + ${ROCCLR_SRC_DIR}/device/devprogram.cpp + ${ROCCLR_SRC_DIR}/device/devwavelimiter.cpp + ${ROCCLR_SRC_DIR}/device/hsailctx.cpp + ${ROCCLR_SRC_DIR}/device/hwdebug.cpp + ${ROCCLR_SRC_DIR}/elf/elf.cpp + ${ROCCLR_SRC_DIR}/os/alloc.cpp + ${ROCCLR_SRC_DIR}/os/os_posix.cpp + ${ROCCLR_SRC_DIR}/os/os_win32.cpp + ${ROCCLR_SRC_DIR}/os/os.cpp + ${ROCCLR_SRC_DIR}/platform/activity.cpp + ${ROCCLR_SRC_DIR}/platform/agent.cpp + ${ROCCLR_SRC_DIR}/platform/command.cpp + ${ROCCLR_SRC_DIR}/platform/commandqueue.cpp + ${ROCCLR_SRC_DIR}/platform/context.cpp + ${ROCCLR_SRC_DIR}/platform/kernel.cpp + ${ROCCLR_SRC_DIR}/platform/memory.cpp + ${ROCCLR_SRC_DIR}/platform/ndrange.cpp + ${ROCCLR_SRC_DIR}/platform/program.cpp + ${ROCCLR_SRC_DIR}/platform/runtime.cpp + ${ROCCLR_SRC_DIR}/thread/monitor.cpp + ${ROCCLR_SRC_DIR}/thread/semaphore.cpp + ${ROCCLR_SRC_DIR}/thread/thread.cpp + ${ROCCLR_SRC_DIR}/utils/debug.cpp + ${ROCCLR_SRC_DIR}/utils/flags.cpp) + +if(WIN32) + target_compile_definitions(rocclr PUBLIC ATI_OS_WIN) +else() + target_compile_definitions(rocclr PUBLIC ATI_OS_LINUX) + + # Additional settings for LinuxPro + option(BUILD_LINUXPRO "Build LinuxPro" OFF) + if(BUILD_LINUXPRO) + target_compile_definitions(rocclr PUBLIC + ROCCLR_DISABLE_PREVEGA ) + endif() +endif() + +target_compile_definitions(rocclr PUBLIC + ATI_ARCH_X86 + LITTLEENDIAN_CPU + WITH_LIQUID_FLASH=0 + ${AMD_OPENCL_DEFS}) + +target_include_directories(rocclr PUBLIC + ${ROCCLR_SRC_DIR} + ${ROCCLR_SRC_DIR}/compiler/lib + ${ROCCLR_SRC_DIR}/compiler/lib/include + ${ROCCLR_SRC_DIR}/compiler/lib/backends/common + ${ROCCLR_SRC_DIR}/device + ${ROCCLR_SRC_DIR}/elf + ${ROCCLR_SRC_DIR}/include + ${AMD_OPENCL_INCLUDE_DIRS}) + +target_link_libraries(rocclr PUBLIC Threads::Threads) +# IPC on Windows is not supported +if(UNIX) + target_link_libraries(rocclr PUBLIC rt) +endif() + +if(ROCCLR_ENABLE_HSAIL) + include(ROCclrHSAIL) +endif() + +if(ROCCLR_ENABLE_LC) + include(ROCclrLC) +endif() + +if(ROCCLR_ENABLE_HSA) + include(ROCclrHSA) +endif() + +if(ROCCLR_ENABLE_PAL) + include(ROCclrPAL) +endif() diff --git a/cmake/modules/FindROCT.cmake b/cmake/ROCclrCompilerOptions.cmake similarity index 58% rename from cmake/modules/FindROCT.cmake rename to cmake/ROCclrCompilerOptions.cmake index 6ad3f59e1..2dae3dab0 100644 --- a/cmake/modules/FindROCT.cmake +++ b/cmake/ROCclrCompilerOptions.cmake @@ -1,4 +1,4 @@ -# Copyright (C) 2019-2021 Advanced Micro Devices, Inc. All Rights Reserved. +# Copyright (c) 2020 - 2021 Advanced Micro Devices, Inc. All rights reserved. # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal @@ -18,19 +18,14 @@ # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN # THE SOFTWARE. -# Try to find ROCT (Radeon Open Compute Thunk) -# -# Once found, this will define: -# - ROCT_FOUND - ROCT status (found or not found) -# - ROCT_INCLUDES - Required ROCT include directories -# - ROCT_LIBRARIES - Required ROCT libraries -find_path(FIND_ROCT_INCLUDES hsakmt.h HINTS /opt/rocm/include) -find_library(FIND_ROCT_LIBRARIES hsakmt HINTS /opt/rocm/lib) - -include(FindPackageHandleStandardArgs) -find_package_handle_standard_args(ROCT DEFAULT_MSG - FIND_ROCT_INCLUDES FIND_ROCT_LIBRARIES) -mark_as_advanced(FIND_ROCT_INCLUDES FIND_ROCT_LIBRARIES) +include_guard() -set(ROCT_INCLUDES ${FIND_ROCT_INCLUDES}) -set(ROCT_LIBRARIES ${FIND_ROCT_LIBRARIES}) +if (CMAKE_CXX_COMPILER_ID STREQUAL "MSVC") + if (CMAKE_VERSION VERSION_LESS "3.20") + # This code is neccessary to avoid this command line warning: + # "Overriding /GR with /GR- cl: command line warning D9025" + # + # /GR is implied by MSVC anyway. So getting rid of it doesn't matter. + string(REPLACE "/GR" "" CMAKE_CXX_FLAGS ${CMAKE_CXX_FLAGS}) + endif() +endif() diff --git a/cmake/ROCclrHSA.cmake b/cmake/ROCclrHSA.cmake new file mode 100644 index 000000000..fb127ed68 --- /dev/null +++ b/cmake/ROCclrHSA.cmake @@ -0,0 +1,52 @@ +# Copyright (c) 2020 - 2021 Advanced Micro Devices, Inc. All rights reserved. +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +# THE SOFTWARE. + +find_package(hsa-runtime64 1.3 REQUIRED CONFIG + PATHS + /opt/rocm/ + PATH_SUFFIXES + cmake/hsa-runtime64 + lib/cmake/hsa-runtime64 + lib64/cmake/hsa-runtime64) +target_link_libraries(rocclr PUBLIC hsa-runtime64::hsa-runtime64) + +find_package(NUMA QUIET) +if(NUMA_FOUND) + target_compile_definitions(rocclr PUBLIC ROCCLR_SUPPORT_NUMA_POLICY) + target_link_libraries(rocclr PUBLIC NUMA) +endif() + +target_sources(rocclr PRIVATE + ${ROCCLR_SRC_DIR}/device/rocm/rocappprofile.cpp + ${ROCCLR_SRC_DIR}/device/rocm/rocblit.cpp + ${ROCCLR_SRC_DIR}/device/rocm/roccounters.cpp + ${ROCCLR_SRC_DIR}/device/rocm/rocdevice.cpp + ${ROCCLR_SRC_DIR}/device/rocm/rocglinterop.cpp + ${ROCCLR_SRC_DIR}/device/rocm/rockernel.cpp + ${ROCCLR_SRC_DIR}/device/rocm/rocmemory.cpp + ${ROCCLR_SRC_DIR}/device/rocm/rocprintf.cpp + ${ROCCLR_SRC_DIR}/device/rocm/rocprogram.cpp + ${ROCCLR_SRC_DIR}/device/rocm/rocschedcl.cpp + ${ROCCLR_SRC_DIR}/device/rocm/rocsettings.cpp + ${ROCCLR_SRC_DIR}/device/rocm/rocsignal.cpp + ${ROCCLR_SRC_DIR}/device/rocm/rocvirtual.cpp + ${ROCCLR_SRC_DIR}/device/rocm/rocurilocator.cpp) + +target_compile_definitions(rocclr PUBLIC WITH_HSA_DEVICE) diff --git a/cmake/ROCclrConfig.cmake.in b/cmake/ROCclrHSAIL.cmake similarity index 79% rename from cmake/ROCclrConfig.cmake.in rename to cmake/ROCclrHSAIL.cmake index 17ef4427b..24af48413 100644 --- a/cmake/ROCclrConfig.cmake.in +++ b/cmake/ROCclrHSAIL.cmake @@ -1,4 +1,4 @@ -# Copyright (C) 2017-2021 Advanced Micro Devices, Inc. All Rights Reserved. +# Copyright (c) 2020 - 2021 Advanced Micro Devices, Inc. All rights reserved. # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal @@ -18,10 +18,4 @@ # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN # THE SOFTWARE. -@ROCCLR_PREFIX_CODE@ - -include( CMakeFindDependencyMacro ) -find_dependency(hsa-runtime64) -find_dependency(amd_comgr) -message(STATUS "ROCclr at ${ROCclr_DIR}") -include("${ROCclr_DIR}/@ROCCLR_TARGETS_NAME@") \ No newline at end of file +target_compile_definitions(rocclr PUBLIC WITH_COMPILER_LIB HSAIL_DYN_DLL) diff --git a/cmake/ROCclrLC.cmake b/cmake/ROCclrLC.cmake new file mode 100644 index 000000000..cb78fa241 --- /dev/null +++ b/cmake/ROCclrLC.cmake @@ -0,0 +1,32 @@ +# Copyright (c) 2020 - 2021 Advanced Micro Devices, Inc. All rights reserved. +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +# THE SOFTWARE. + +find_package(amd_comgr REQUIRED CONFIG + PATHS + /opt/rocm/ + PATH_SUFFIXES + cmake/amd_comgr + lib/cmake/amd_comgr) + +target_compile_definitions(rocclr PUBLIC WITH_LIGHTNING_COMPILER USE_COMGR_LIBRARY) +if(BUILD_SHARED_LIBS) + target_compile_definitions(rocclr PUBLIC COMGR_DYN_DLL) +endif() +target_link_libraries(rocclr PUBLIC amd_comgr) diff --git a/cmake/ROCclrPAL.cmake b/cmake/ROCclrPAL.cmake new file mode 100644 index 000000000..304a42566 --- /dev/null +++ b/cmake/ROCclrPAL.cmake @@ -0,0 +1,82 @@ +# Copyright (c) 2020 - 2021 Advanced Micro Devices, Inc. All rights reserved. +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +# THE SOFTWARE. + +file(STRINGS ${ROCCLR_SRC_DIR}/device/pal/palcdefs PAL_MAJOR_VERSION REGEX "^PAL_MAJOR_VERSION = [0-9]+") +string(REGEX REPLACE "PAL_MAJOR_VERSION = " "" PAL_MAJOR_VERSION ${PAL_MAJOR_VERSION}) + +file(STRINGS ${ROCCLR_SRC_DIR}/device/pal/palcdefs GPUOPEN_MAJOR_VERSION REGEX "^GPUOPEN_MAJOR_VERSION = [0-9]+") +string(REGEX REPLACE "GPUOPEN_MAJOR_VERSION = " "" GPUOPEN_MAJOR_VERSION ${GPUOPEN_MAJOR_VERSION}) + +set(PAL_CLIENT "OCL") + +set(PAL_CLIENT_INTERFACE_MAJOR_VERSION ${PAL_MAJOR_VERSION}) +set(GPUOPEN_CLIENT_INTERFACE_MAJOR_VERSION ${GPUOPEN_MAJOR_VERSION}) +set(GPUOPEN_CLIENT_INTERFACE_MINOR_VERSION 0) + +set(PAL_CLOSED_SOURCE ON) +set(PAL_DEVELOPER_BUILD OFF) +set(PAL_BUILD_GPUOPEN ON) +set(PAL_BUILD_SCPC OFF) +set(PAL_BUILD_VIDEO OFF) +set(PAL_BUILD_DTIF OFF) +set(PAL_BUILD_OSS ON) +set(PAL_BUILD_SECURITY OFF) +set(PAL_SPPAP_CLOSED_SOURCE OFF) +set(PAL_BUILD_GFX ON) +set(PAL_BUILD_NULL_DEVICE OFF) +set(PAL_BUILD_GFX6 ON) +set(PAL_BUILD_GFX9 ON) + +find_package(AMD_PAL) +find_package(AMD_HSA_LOADER) +find_package(AMD_UGL) + +target_sources(rocclr PRIVATE + ${ROCCLR_SRC_DIR}/device/pal/palappprofile.cpp + ${ROCCLR_SRC_DIR}/device/pal/palblit.cpp + ${ROCCLR_SRC_DIR}/device/pal/palconstbuf.cpp + ${ROCCLR_SRC_DIR}/device/pal/palcounters.cpp + ${ROCCLR_SRC_DIR}/device/pal/paldebugmanager.cpp + ${ROCCLR_SRC_DIR}/device/pal/paldevice.cpp + ${ROCCLR_SRC_DIR}/device/pal/paldeviced3d10.cpp + ${ROCCLR_SRC_DIR}/device/pal/paldeviced3d11.cpp + ${ROCCLR_SRC_DIR}/device/pal/paldeviced3d9.cpp + ${ROCCLR_SRC_DIR}/device/pal/paldevicegl.cpp + ${ROCCLR_SRC_DIR}/device/pal/palgpuopen.cpp + ${ROCCLR_SRC_DIR}/device/pal/palkernel.cpp + ${ROCCLR_SRC_DIR}/device/pal/palmemory.cpp + ${ROCCLR_SRC_DIR}/device/pal/palprintf.cpp + ${ROCCLR_SRC_DIR}/device/pal/palprogram.cpp + ${ROCCLR_SRC_DIR}/device/pal/palresource.cpp + ${ROCCLR_SRC_DIR}/device/pal/palschedcl.cpp + ${ROCCLR_SRC_DIR}/device/pal/palsettings.cpp + ${ROCCLR_SRC_DIR}/device/pal/palsignal.cpp + ${ROCCLR_SRC_DIR}/device/pal/palthreadtrace.cpp + ${ROCCLR_SRC_DIR}/device/pal/paltimestamp.cpp + ${ROCCLR_SRC_DIR}/device/pal/palvirtual.cpp) + +target_compile_definitions(rocclr PUBLIC WITH_PAL_DEVICE PAL_GPUOPEN_OCL) +target_include_directories(rocclr PUBLIC ${AMD_UGL_INCLUDE_DIRS}) +target_link_libraries(rocclr PUBLIC pal amdhsaloader) + +# support for OGL/D3D interop +if(WIN32) + target_link_libraries(rocclr PUBLIC opengl32.lib dxguid.lib) +endif() diff --git a/cmake/modules/bc2h.cmake b/cmake/modules/bc2h.cmake deleted file mode 100644 index 4a7664539..000000000 --- a/cmake/modules/bc2h.cmake +++ /dev/null @@ -1,59 +0,0 @@ -# Copyright (C) 2017-2021 Advanced Micro Devices, Inc. All Rights Reserved. -# -# Permission is hereby granted, free of charge, to any person obtaining a copy -# of this software and associated documentation files (the "Software"), to deal -# in the Software without restriction, including without limitation the rights -# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -# copies of the Software, and to permit persons to whom the Software is -# furnished to do so, subject to the following conditions: -# -# The above copyright notice and this permission notice shall be included in -# all copies or substantial portions of the Software. -# -# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -# THE SOFTWARE. - -file(WRITE ${CMAKE_CURRENT_BINARY_DIR}/bc2h.c -"#include \n" -"int main(int argc, char **argv){\n" -" FILE *ifp, *ofp;\n" -" int c, i, l;\n" -" if (argc != 4) return 1;\n" -" ifp = fopen(argv[1], \"rb\");\n" -" if (!ifp) return 1;\n" -" i = fseek(ifp, 0, SEEK_END);\n" -" if (i < 0) return 1;\n" -" l = ftell(ifp);\n" -" if (l < 0) return 1;\n" -" i = fseek(ifp, 0, SEEK_SET);\n" -" if (i < 0) return 1;\n" -" ofp = fopen(argv[2], \"wb+\");\n" -" if (!ofp) return 1;\n" -" fprintf(ofp, \"#define %s_size %d\\n\\n\"\n" -" \"#if defined __GNUC__\\n\"\n" -" \"__attribute__((aligned (4096)))\\n\"\n" -" \"#elif defined _MSC_VER\\n\"\n" -" \"__declspec(align(4096))\\n\"\n" -" \"#endif\\n\"\n" -" \"static const unsigned char %s[%s_size+1] = {\",\n" -" argv[3], l,\n" -" argv[3], argv[3]);\n" -" i = 0;\n" -" while ((c = getc(ifp)) != EOF) {\n" -" if (0 == (i&7)) fprintf(ofp, \"\\n \");\n" -" fprintf(ofp, \" 0x%02x,\", c);\n" -" ++i;\n" -" }\n" -" fprintf(ofp, \" 0x00\\n};\\n\\n\");\n" -" fclose(ifp);\n" -" fclose(ofp);\n" -" return 0;\n" -"}\n" -) - -add_executable(bc2h ${CMAKE_CURRENT_BINARY_DIR}/bc2h.c) diff --git a/compiler/lib/backends/common/library.hpp b/compiler/lib/backends/common/library.hpp index e2d94ecf5..2518dc5c1 100644 --- a/compiler/lib/backends/common/library.hpp +++ b/compiler/lib/backends/common/library.hpp @@ -1,4 +1,4 @@ -/* Copyright (c) 2008-present Advanced Micro Devices, Inc. +/* Copyright (c) 2008 - 2021 Advanced Micro Devices, Inc. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/compiler/lib/include/acl.h b/compiler/lib/include/acl.h index d95518e01..30e31090d 100644 --- a/compiler/lib/include/acl.h +++ b/compiler/lib/include/acl.h @@ -1,4 +1,4 @@ -/* Copyright (c) 2012-present Advanced Micro Devices, Inc. +/* Copyright (c) 2012 - 2021 Advanced Micro Devices, Inc. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/compiler/lib/include/aclDefs.h b/compiler/lib/include/aclDefs.h index a0a447160..eac617c50 100644 --- a/compiler/lib/include/aclDefs.h +++ b/compiler/lib/include/aclDefs.h @@ -1,4 +1,4 @@ -/* Copyright (c) 2011-present Advanced Micro Devices, Inc. +/* Copyright (c) 2011 - 2021 Advanced Micro Devices, Inc. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/compiler/lib/include/aclEnums.h b/compiler/lib/include/aclEnums.h index 106415b45..c4aabf030 100644 --- a/compiler/lib/include/aclEnums.h +++ b/compiler/lib/include/aclEnums.h @@ -1,4 +1,4 @@ -/* Copyright (c) 2012-present Advanced Micro Devices, Inc. +/* Copyright (c) 2012 - 2021 Advanced Micro Devices, Inc. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/compiler/lib/include/aclFunctors.h b/compiler/lib/include/aclFunctors.h index ab8a862c1..721fd6376 100644 --- a/compiler/lib/include/aclFunctors.h +++ b/compiler/lib/include/aclFunctors.h @@ -1,4 +1,4 @@ -/* Copyright (c) 2012-present Advanced Micro Devices, Inc. +/* Copyright (c) 2012 - 2021 Advanced Micro Devices, Inc. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/compiler/lib/include/aclStructs.h b/compiler/lib/include/aclStructs.h index e38bdee32..2bfc6afb8 100644 --- a/compiler/lib/include/aclStructs.h +++ b/compiler/lib/include/aclStructs.h @@ -1,4 +1,4 @@ -/* Copyright (c) 2012-present Advanced Micro Devices, Inc. +/* Copyright (c) 2012 - 2021 Advanced Micro Devices, Inc. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/compiler/lib/include/aclTypes.h b/compiler/lib/include/aclTypes.h index f2803b9b2..433829710 100644 --- a/compiler/lib/include/aclTypes.h +++ b/compiler/lib/include/aclTypes.h @@ -1,4 +1,4 @@ -/* Copyright (c) 2012-present Advanced Micro Devices, Inc. +/* Copyright (c) 2012 - 2021 Advanced Micro Devices, Inc. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/compiler/lib/spirv/spirvUtils.h b/compiler/lib/spirv/spirvUtils.h index e5d701b63..0a4a562c4 100644 --- a/compiler/lib/spirv/spirvUtils.h +++ b/compiler/lib/spirv/spirvUtils.h @@ -1,4 +1,4 @@ -/* Copyright (c) 2008-present Advanced Micro Devices, Inc. +/* Copyright (c) 2008 - 2021 Advanced Micro Devices, Inc. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/compiler/lib/utils/OPTIONS.def b/compiler/lib/utils/OPTIONS.def index 46fbbaae2..005465c58 100644 --- a/compiler/lib/utils/OPTIONS.def +++ b/compiler/lib/utils/OPTIONS.def @@ -1,4 +1,4 @@ -/* Copyright (c) 2010-present Advanced Micro Devices, Inc. +/* Copyright (c) 2010 - 2021 Advanced Micro Devices, Inc. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/compiler/lib/utils/bif_section_labels.hpp b/compiler/lib/utils/bif_section_labels.hpp index a5322e94f..873c73032 100644 --- a/compiler/lib/utils/bif_section_labels.hpp +++ b/compiler/lib/utils/bif_section_labels.hpp @@ -1,4 +1,4 @@ -/* Copyright (c) 2012-present Advanced Micro Devices, Inc. +/* Copyright (c) 2012 - 2021 Advanced Micro Devices, Inc. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/compiler/lib/utils/libUtils.h b/compiler/lib/utils/libUtils.h index aa383f396..9e4e47b0b 100644 --- a/compiler/lib/utils/libUtils.h +++ b/compiler/lib/utils/libUtils.h @@ -1,4 +1,4 @@ -/* Copyright (c) 2011-present Advanced Micro Devices, Inc. +/* Copyright (c) 2011 - 2021 Advanced Micro Devices, Inc. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/compiler/lib/utils/options.cpp b/compiler/lib/utils/options.cpp index b90bb1aa6..3127338a6 100644 --- a/compiler/lib/utils/options.cpp +++ b/compiler/lib/utils/options.cpp @@ -1,4 +1,4 @@ -/* Copyright (c) 2008-present Advanced Micro Devices, Inc. +/* Copyright (c) 2008 - 2021 Advanced Micro Devices, Inc. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/compiler/lib/utils/options.hpp b/compiler/lib/utils/options.hpp index 25b15a53a..30442fe87 100644 --- a/compiler/lib/utils/options.hpp +++ b/compiler/lib/utils/options.hpp @@ -1,4 +1,4 @@ -/* Copyright (c) 2009-present Advanced Micro Devices, Inc. +/* Copyright (c) 2009 - 2021 Advanced Micro Devices, Inc. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/device/appprofile.cpp b/device/appprofile.cpp index 4e7930ec5..23255ecde 100644 --- a/device/appprofile.cpp +++ b/device/appprofile.cpp @@ -1,4 +1,4 @@ -/* Copyright (c) 2014-present Advanced Micro Devices, Inc. +/* Copyright (c) 2014 - 2021 Advanced Micro Devices, Inc. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal @@ -22,133 +22,19 @@ #include "os/os.hpp" #include "utils/flags.hpp" #include "appprofile.hpp" -#if !defined(WITH_LIGHTNING_COMPILER) -#include "adl.h" -#endif // !defined(WITH_LIGHTNING_COMPILER) #include #include -#if defined(WITH_LIGHTNING_COMPILER) typedef void* ADLApplicationProfile; int SearchProfileOfAnApplication(const wchar_t* fileName, ADLApplicationProfile** lppProfile) { return 0; } -#define __stdcall -#endif // defined(WITH_LIGHTNING_COMPILER) - -#ifdef BRAHMA -extern int SearchProfileOfAnApplication(const wchar_t* fileName, - ADLApplicationProfile** lppProfile); -#endif // BRAHMA #define GETPROCADDRESS(_adltype_, _adlfunc_) (_adltype_) amd::Os::getSymbol(adlHandle_, #_adlfunc_); namespace amd { -#if !defined(BRAHMA) && !defined(WITH_LIGHTNING_COMPILER) - -static void* __stdcall adlMallocCallback(int n) { return malloc(n); } - -class ADL { - public: - ADL(); - ~ADL(); - - bool init(); - - void* adlHandle() const { return adlHandle_; }; - ADL_CONTEXT_HANDLE adlContext() const { return adlContext_; } - - typedef int (*Adl2MainControlCreate)(ADL_MAIN_MALLOC_CALLBACK callback, - int iEnumConnectedAdapters, ADL_CONTEXT_HANDLE* context); - typedef int (*Adl2MainControlDestroy)(ADL_CONTEXT_HANDLE context); - typedef int (*Adl2ConsoleModeFileDescriptorSet)(ADL_CONTEXT_HANDLE context, int fileDescriptor); - typedef int (*Adl2MainControlRefresh)(ADL_CONTEXT_HANDLE context); - typedef int (*Adl2ApplicationProfilesSystemReload)(ADL_CONTEXT_HANDLE context); - typedef int (*Adl2ApplicationProfilesProfileOfApplicationx2Search)( - ADL_CONTEXT_HANDLE context, const wchar_t* fileName, const wchar_t* path, - const wchar_t* version, const wchar_t* appProfileArea, ADLApplicationProfile** lppProfile); - - Adl2MainControlCreate adl2MainControlCreate; - Adl2MainControlDestroy adl2MainControlDestroy; - Adl2ConsoleModeFileDescriptorSet adl2ConsoleModeFileDescriptorSet; - Adl2MainControlRefresh adl2MainControlRefresh; - Adl2ApplicationProfilesSystemReload adl2ApplicationProfilesSystemReload; - Adl2ApplicationProfilesProfileOfApplicationx2Search - adl2ApplicationProfilesProfileOfApplicationx2Search; - - private: - void* adlHandle_; - ADL_CONTEXT_HANDLE adlContext_; -}; - -ADL::ADL() : adlHandle_(NULL), adlContext_(NULL) { - adl2MainControlCreate = NULL; - adl2MainControlDestroy = NULL; - adl2ConsoleModeFileDescriptorSet = NULL; - adl2MainControlRefresh = NULL; - adl2ApplicationProfilesSystemReload = NULL; - adl2ApplicationProfilesProfileOfApplicationx2Search = NULL; -} - -ADL::~ADL() { - if (adl2MainControlDestroy != NULL) { - adl2MainControlDestroy(adlContext_); - } - adlContext_ = NULL; -} - -bool ADL::init() { - if (!adlHandle_) { - adlHandle_ = amd::Os::loadLibrary("atiadl" LP64_SWITCH(LINUX_SWITCH("xx", "xy"), "xx")); - } - - if (!adlHandle_) { - return false; - } - - adl2MainControlCreate = GETPROCADDRESS(Adl2MainControlCreate, ADL2_Main_Control_Create); - adl2MainControlDestroy = GETPROCADDRESS(Adl2MainControlDestroy, ADL2_Main_Control_Destroy); - adl2ConsoleModeFileDescriptorSet = - GETPROCADDRESS(Adl2ConsoleModeFileDescriptorSet, ADL2_ConsoleMode_FileDescriptor_Set); - adl2MainControlRefresh = GETPROCADDRESS(Adl2MainControlRefresh, ADL2_Main_Control_Refresh); - adl2ApplicationProfilesSystemReload = - GETPROCADDRESS(Adl2ApplicationProfilesSystemReload, ADL2_ApplicationProfiles_System_Reload); - adl2ApplicationProfilesProfileOfApplicationx2Search = - GETPROCADDRESS(Adl2ApplicationProfilesProfileOfApplicationx2Search, - ADL2_ApplicationProfiles_ProfileOfAnApplicationX2_Search); - - if (adl2MainControlCreate == NULL || adl2MainControlDestroy == NULL || - adl2MainControlRefresh == NULL || adl2ApplicationProfilesSystemReload == NULL || - adl2ApplicationProfilesProfileOfApplicationx2Search == NULL) { - return false; - } - - int result = adl2MainControlCreate(adlMallocCallback, 1, &adlContext_); - if (result != ADL_OK) { - // ADL2 is expected to return ADL_ERR_NO_XDISPLAY in Linux Console mode environment - if (result == ADL_ERR_NO_XDISPLAY) { - if (adl2ConsoleModeFileDescriptorSet == NULL || - adl2ConsoleModeFileDescriptorSet(adlContext_, ADL_UNSET) != ADL_OK) { - return false; - } - adl2MainControlRefresh(adlContext_); - } else { - return false; - } - } - - // Reload is disabled in ADL with the change list 1198904 and ticket - // SWDEV-59442 - The ADL_ApplicationProfiles_System_Reload Function is not Re-entrant - // Returned value is ADL_ERR_NOT_SUPPORTED on Windows. - adl2ApplicationProfilesSystemReload(adlContext_); - - return true; -} - -#endif // BRAHMA - AppProfile::AppProfile() : gpuvmHighAddr_(false), profileOverridesAllSettings_(false) { amd::Os::getAppPathAndFileName(appFileName_, appPathAndFileName_); propertyDataMap_.insert( @@ -193,75 +79,14 @@ bool AppProfile::init() { bool AppProfile::ParseApplicationProfile() { ADLApplicationProfile* pProfile = NULL; -#if !defined(BRAHMA) && !defined(WITH_LIGHTNING_COMPILER) - amd::ADL* adl = new amd::ADL; - - if ((adl == NULL) || !adl->init()) { - delete adl; - return false; - } - - // Apply blb configurations - int result = adl->adl2ApplicationProfilesProfileOfApplicationx2Search( - adl->adlContext(), wsAppFileName_.c_str(), NULL, NULL, L"OCL", &pProfile); - - delete adl; - -#else // BRAHMA - if (!SearchProfileOfAnApplication(wsAppFileName_.c_str(), &pProfile)) { return false; } -#endif // BRAHMA - if (pProfile == NULL) { return false; } -#if !defined(WITH_LIGHTNING_COMPILER) - PropertyRecord* firstProperty = pProfile->record; - uint32_t valueOffset = 0; - const int BUFSIZE = 1024; - wchar_t wbuffer[BUFSIZE]; - char buffer[2 * BUFSIZE]; - - for (int index = 0; index < pProfile->iCount; index++) { - PropertyRecord* profileProperty = - reinterpret_cast((reinterpret_cast(firstProperty)) + valueOffset); - - // Get property name - char* propertyName = profileProperty->strName; - auto entry = propertyDataMap_.find(std::string(propertyName)); - if (entry == propertyDataMap_.end()) { - // unexpected name - valueOffset += (sizeof(PropertyRecord) + profileProperty->iDataSize - 4); - continue; - } - - // Get the property value - switch (entry->second.type_) { - case DataType_Boolean: - *(reinterpret_cast(entry->second.data_)) = profileProperty->uData[0] ? true : false; - break; - case DataType_String: { - assert((size_t)(profileProperty->iDataSize) < sizeof(wbuffer) - 2 && - "app profile string too long"); - memset(wbuffer, 0, sizeof(wbuffer)); - memcpy(wbuffer, profileProperty->uData, profileProperty->iDataSize); - size_t len = wcstombs(buffer, wbuffer, sizeof(buffer)); - assert(len < sizeof(buffer) - 1 && "app profile string too long"); - *(reinterpret_cast(entry->second.data_)) = buffer; - break; - } - default: - break; - } - valueOffset += (sizeof(PropertyRecord) + profileProperty->iDataSize - 4); - } - - free(pProfile); -#endif // !defined(WITH_LIGHTNING_COMPILER) return true; } } diff --git a/device/appprofile.hpp b/device/appprofile.hpp index ca82ea90a..f44535952 100644 --- a/device/appprofile.hpp +++ b/device/appprofile.hpp @@ -1,4 +1,4 @@ -/* Copyright (c) 2014-present Advanced Micro Devices, Inc. +/* Copyright (c) 2014 - 2021 Advanced Micro Devices, Inc. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/device/blit.cpp b/device/blit.cpp index 15eeb8902..f2cbd0763 100644 --- a/device/blit.cpp +++ b/device/blit.cpp @@ -1,4 +1,4 @@ -/* Copyright (c) 2010-present Advanced Micro Devices, Inc. +/* Copyright (c) 2010 - 2021 Advanced Micro Devices, Inc. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/device/blit.hpp b/device/blit.hpp index b87f3d854..8ffd6d85c 100644 --- a/device/blit.hpp +++ b/device/blit.hpp @@ -1,4 +1,4 @@ -/* Copyright (c) 2010-present Advanced Micro Devices, Inc. +/* Copyright (c) 2010 - 2021 Advanced Micro Devices, Inc. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/device/blitcl.cpp b/device/blitcl.cpp index c328c30c3..1520a4fb5 100644 --- a/device/blitcl.cpp +++ b/device/blitcl.cpp @@ -1,4 +1,4 @@ -/* Copyright (c) 2010-present Advanced Micro Devices, Inc. +/* Copyright (c) 2010 - 2021 Advanced Micro Devices, Inc. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/device/comgrctx.cpp b/device/comgrctx.cpp index de195357c..5a2406738 100644 --- a/device/comgrctx.cpp +++ b/device/comgrctx.cpp @@ -1,4 +1,4 @@ -/* Copyright (c) 2008-present Advanced Micro Devices, Inc. +/* Copyright (c) 2008 - 2021 Advanced Micro Devices, Inc. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/device/comgrctx.hpp b/device/comgrctx.hpp index 3399b4eeb..cd74917c2 100644 --- a/device/comgrctx.hpp +++ b/device/comgrctx.hpp @@ -1,4 +1,4 @@ -/* Copyright (c) 2008-present Advanced Micro Devices, Inc. +/* Copyright (c) 2008 - 2021 Advanced Micro Devices, Inc. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/device/devhcmessages.cpp b/device/devhcmessages.cpp index 7b57c0a2f..c8b9c0df2 100644 --- a/device/devhcmessages.cpp +++ b/device/devhcmessages.cpp @@ -1,4 +1,4 @@ -/* Copyright (c) 2020-present Advanced Micro Devices, Inc. +/* Copyright (c) 2020 - 2021 Advanced Micro Devices, Inc. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/device/devhcmessages.hpp b/device/devhcmessages.hpp index 7f156decd..fc009c613 100644 --- a/device/devhcmessages.hpp +++ b/device/devhcmessages.hpp @@ -1,4 +1,4 @@ -/* Copyright (c) 2020-present Advanced Micro Devices, Inc. +/* Copyright (c) 2020 - 2021 Advanced Micro Devices, Inc. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal @@ -70,7 +70,12 @@ enum ServiceID { SERVICE_RESERVED = 0, SERVICE_FUNCTION_CALL = 1, SERVICE_PRINTF = 2, - SERVICE_DEVMEM = 3, + SERVICE_DEVMEM = 3 + #if defined(__clang__) + #if __has_feature(address_sanitizer) + , SERVICE_SANITIZER = 4 + #endif + #endif }; struct Message; diff --git a/device/devhcprintf.cpp b/device/devhcprintf.cpp index ea5adcbe8..40078db37 100644 --- a/device/devhcprintf.cpp +++ b/device/devhcprintf.cpp @@ -1,4 +1,4 @@ -/* Copyright (c) 2020-present Advanced Micro Devices, Inc. +/* Copyright (c) 2020 - 2021 Advanced Micro Devices, Inc. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/device/devhostcall.cpp b/device/devhostcall.cpp index a1bb1b6f7..c49ecc012 100644 --- a/device/devhostcall.cpp +++ b/device/devhostcall.cpp @@ -1,4 +1,4 @@ -/* Copyright (c) 2019-present Advanced Micro Devices, Inc. +/* Copyright (c) 2019 - 2021 Advanced Micro Devices, Inc. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal @@ -36,91 +36,11 @@ #include #include -namespace { // anonymous - -enum SignalValue { SIGNAL_DONE = 0, SIGNAL_INIT = 1 }; - -/** \brief Packet payload - * - * Contains 64 slots of 8 ulongs each, one for each workitem in the - * wave. A slot with index \c i contains valid data if the - * corresponding bit in PacketHeader::activemask is set. - */ -struct Payload { - uint64_t slots[64][8]; -}; - -/** Packet header */ -struct PacketHeader { - /** Tagged pointer to the next packet in an intrusive stack */ - uint64_t next_; - /** Bitmask that represents payload slots with valid data */ - uint64_t activemask_; - /** Service ID requested by the wave */ - uint32_t service_; - /** Control bits. - * \li 0: \c READY flag. Indicates packet awaiting a host response. - */ - std::atomic control_; -}; - -static_assert(std::is_standard_layout::value, - "the hostcall packet must be useable from other languages"); - -/** Field offsets in the packet control field */ -enum ControlOffset { - CONTROL_OFFSET_READY_FLAG = 0, - CONTROL_OFFSET_RESERVED0 = 1, -}; - -/** Field widths in the packet control field */ -enum ControlWidth { - CONTROL_WIDTH_READY_FLAG = 1, - CONTROL_WIDTH_RESERVED0 = 31, -}; - -/** \brief Shared buffer submitting hostcall requests. - * - * Holds hostcall packets requested by all kernels executing on the - * same device queue. Each hostcall buffer is associated with at most - * one device queue. - * - * Packets in the buffer are accessed using 64-bit tagged pointers to mitigate - * the ABA problem in lock-free stacks. The index_mask is used to extract the - * lower bits of the pointer, which form the index into the packet array. The - * remaining higher bits define a tag that is incremented on every pop from a - * stack. - */ -class HostcallBuffer { - /** Array of packet headers */ - PacketHeader* headers_; - /** Array of packet payloads */ - Payload* payloads_; - /** Signal used by kernels to indicate new work */ - void* doorbell_; - /** Stack of free packets. Uses tagged pointers. */ - uint64_t free_stack_; - /** Stack of ready packets. Uses tagged pointers */ - std::atomic ready_stack_; - /** Mask for accessing the packet index in the tagged pointer. */ - uint64_t index_mask_; - /** Some services need a device */ - const amd::Device* device_; - - PacketHeader* getHeader(uint64_t ptr) const; - Payload* getPayload(uint64_t ptr) const; - - public: - void processPackets(MessageHandler& messages); - void initialize(uint32_t num_packets); - void setDoorbell(void* doorbell) { doorbell_ = doorbell; }; - void setDevice(const amd::Device* dptr) { device_ = dptr; } -}; - -static_assert(std::is_standard_layout::value, - "the hostcall buffer must be useable from other languages"); - -}; // namespace +#if defined(__clang__) +#if __has_feature(address_sanitizer) +#include "device/devsanitizer.hpp" +#endif +#endif PacketHeader* HostcallBuffer::getHeader(uint64_t ptr) const { return headers_ + (ptr & index_mask_); @@ -205,6 +125,7 @@ void HostcallBuffer::processPackets(MessageHandler& messages) { // Grab the entire ready stack and set the top to 0. New requests from the // device will continue pushing on the stack while we process the packets that // we have grabbed. + uint64_t ready_stack = std::atomic_exchange_explicit(&ready_stack_, static_cast(0), std::memory_order_acquire); if (!ready_stack) { return; @@ -222,6 +143,16 @@ void HostcallBuffer::processPackets(MessageHandler& messages) { auto service = header->service_; auto payload = getPayload(iter); auto activemask = header->activemask_; + +#if defined(__clang__) +#if __has_feature(address_sanitizer) + if (service == SERVICE_SANITIZER) { + handleSanitizerService(payload, activemask, device_, uri_locator); + //activemask zeroed to avoid subsequent handling for each work-item. + activemask = 0; + } +#endif +#endif while (activemask) { auto wi = amd::leastBitSet(activemask); activemask ^= static_cast(1) << wi; @@ -290,7 +221,11 @@ class HostcallListener { std::set buffers_; device::Signal* doorbell_; MessageHandler messages_; - +#if defined(__clang__) +#if __has_feature(address_sanitizer) + device::UriLocator* urilocator = nullptr; +#endif +#endif class Thread : public amd::Thread { public: Thread() : amd::Thread("Hostcall Listener Thread", CQ_THREAD_STACK_SIZE) {} @@ -338,7 +273,6 @@ amd::Monitor listenerLock("Hostcall listener lock"); void HostcallListener::consumePackets() { uint64_t timeout = 1024 * 1024; uint64_t signal_value = SIGNAL_INIT; - while (true) { while (true) { uint64_t new_value = doorbell_->Wait(signal_value, device::Signal::Condition::Ne, timeout); @@ -349,7 +283,6 @@ void HostcallListener::consumePackets() { } if (signal_value == SIGNAL_DONE) { - ClPrint(amd::LOG_INFO, amd::LOG_INIT, "Hostcall listener received SIGNAL_DONE"); return; } @@ -375,12 +308,23 @@ void HostcallListener::terminate() { amd::Os::yield(); } +#if defined(__clang__) +#if __has_feature(address_sanitizer) + if (urilocator) + delete urilocator; +#endif +#endif delete doorbell_; } void HostcallListener::addBuffer(HostcallBuffer* buffer) { assert(buffers_.count(buffer) == 0 && "buffer already present"); buffer->setDoorbell(doorbell_->getHandle()); +#if defined(__clang__) +#if __has_feature(address_sanitizer) + buffer->setUriLocator(urilocator); +#endif +#endif buffers_.insert(buffer); } @@ -391,19 +335,30 @@ void HostcallListener::removeBuffer(HostcallBuffer* buffer) { bool HostcallListener::initialize(const amd::Device &dev) { doorbell_ = dev.createSignal(); -#ifdef WITH_HSA_DEVICE - auto ws = device::Signal::WaitState::Blocked; -#else +#if defined(WITH_PAL_DEVICE) && !defined(_WIN32) auto ws = device::Signal::WaitState::Active; +#else + auto ws = device::Signal::WaitState::Blocked; #endif if ((doorbell_ == nullptr) || !doorbell_->Init(dev, SIGNAL_INIT, ws)) { return false; } +#if defined(__clang__) +#if __has_feature(address_sanitizer) + urilocator = dev.createUriLocator(); +#endif +#endif // If the listener thread was not successfully initialized, clean // everything up and bail out. if (thread_.state() < Thread::INITIALIZED) { delete doorbell_; +#if defined(__clang__) +#if __has_feature(address_sanitizer) + if (urilocator) + delete urilocator; +#endif +#endif return false; } diff --git a/device/devhostcall.hpp b/device/devhostcall.hpp index 1a3f275a9..f529c1020 100644 --- a/device/devhostcall.hpp +++ b/device/devhostcall.hpp @@ -1,4 +1,4 @@ -/* Copyright (c) 2019-present Advanced Micro Devices, Inc. +/* Copyright (c) 2019 - 2021 Advanced Micro Devices, Inc. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal @@ -22,9 +22,14 @@ #include "top.hpp" #include "device/device.hpp" - +#include "device/devhcmessages.hpp" #include +#if defined(__clang__) +#if __has_feature(address_sanitizer) +#include "device/devurilocator.hpp" +#endif +#endif /** \file Support for invoking host services from the device. * * A hostcall is a fixed-size request generated by a kernel running @@ -79,3 +84,94 @@ uint32_t getHostcallBufferAlignment(void); bool enableHostcalls(const amd::Device& dev, void* buffer, uint32_t numPackets); void disableHostcalls(void* buffer); + +enum SignalValue { SIGNAL_DONE = 0, SIGNAL_INIT = 1 }; + +/** \brief Packet payload + * + * Contains 64 slots of 8 ulongs each, one for each workitem in the + * wave. A slot with index \c i contains valid data if the + * corresponding bit in PacketHeader::activemask is set. + */ +struct Payload { + uint64_t slots[64][8]; +}; + +/** Packet header */ +struct PacketHeader { + /** Tagged pointer to the next packet in an intrusive stack */ + uint64_t next_; + /** Bitmask that represents payload slots with valid data */ + uint64_t activemask_; + /** Service ID requested by the wave */ + uint32_t service_; + /** Control bits. + * \li 0: \c READY flag. Indicates packet awaiting a host response. + */ + std::atomic control_; +}; + +static_assert(std::is_standard_layout::value, + "the hostcall packet must be useable from other languages"); + +/** Field offsets in the packet control field */ +enum ControlOffset { + CONTROL_OFFSET_READY_FLAG = 0, + CONTROL_OFFSET_RESERVED0 = 1, +}; + +/** Field widths in the packet control field */ +enum ControlWidth { + CONTROL_WIDTH_READY_FLAG = 1, + CONTROL_WIDTH_RESERVED0 = 31, +}; + +/** \brief Shared buffer submitting hostcall requests. + * + * Holds hostcall packets requested by all kernels executing on the + * same device queue. Each hostcall buffer is associated with at most + * one device queue. + * + * Packets in the buffer are accessed using 64-bit tagged pointers to mitigate + * the ABA problem in lock-free stacks. The index_mask is used to extract the + * lower bits of the pointer, which form the index into the packet array. The + * remaining higher bits define a tag that is incremented on every pop from a + * stack. + */ +class HostcallBuffer { + /** Array of packet headers */ + PacketHeader* headers_; + /** Array of packet payloads */ + Payload* payloads_; + /** Signal used by kernels to indicate new work */ + void* doorbell_; + /** Stack of free packets. Uses tagged pointers. */ + uint64_t free_stack_; + /** Stack of ready packets. Uses tagged pointers */ + std::atomic ready_stack_; + /** Mask for accessing the packet index in the tagged pointer. */ + uint64_t index_mask_; + /** Some services need a device**/ + const amd::Device* device_; + + PacketHeader* getHeader(uint64_t ptr) const; + Payload* getPayload(uint64_t ptr) const; + + public: + void processPackets(MessageHandler& messages); + void initialize(uint32_t num_packets); + void setDoorbell(void* doorbell) { doorbell_ = doorbell; }; + void setDevice(const amd::Device* dptr) { device_ = dptr; }; + + #if defined(__clang__) + #if __has_feature(address_sanitizer) + private: + device::UriLocator* uri_locator; + public: + void setUriLocator(device::UriLocator* uri_l) { uri_locator = uri_l; }; + #endif + #endif +}; + +static_assert(std::is_standard_layout::value, + "the hostcall buffer must be useable from other languages"); diff --git a/device/device.cpp b/device/device.cpp index 82d55b44e..150a7b540 100644 --- a/device/device.cpp +++ b/device/device.cpp @@ -1,4 +1,4 @@ -/* Copyright (c) 2008-present Advanced Micro Devices, Inc. +/* Copyright (c) 2008 - 2021 Advanced Micro Devices, Inc. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal @@ -158,9 +158,9 @@ std::pair Isa::supportedIsas() { {"gfx908:sramecc-:xnack+", nullptr, true, false, false, 9, 0, 8, OFF, ON, 4, 16, 1, 256, 64 * Ki, 32}, {"gfx908:sramecc+:xnack-", nullptr, true, false, false, 9, 0, 8, ON, OFF, 4, 16, 1, 256, 64 * Ki, 32}, {"gfx908:sramecc+:xnack+", nullptr, true, false, false, 9, 0, 8, ON, ON, 4, 16, 1, 256, 64 * Ki, 32}, - {"gfx909", nullptr, false, false, false, 9, 0, 9, NONE, ANY, 4, 16, 1, 256, 64 * Ki, 32}, // Also Raven2 (can execute Raven code) - {"gfx909:xnack-", nullptr, false, false, false, 9, 0, 9, NONE, OFF, 4, 16, 1, 256, 64 * Ki, 32}, - {"gfx909:xnack+", nullptr, false, false, false, 9, 0, 9, NONE, ON, 4, 16, 1, 256, 64 * Ki, 32}, + {"gfx902", "gfx903", false, true, false, 9, 0, 2, NONE, ANY, 4, 16, 1, 256, 64 * Ki, 32}, // Also Raven2 (can execute Raven code) + {"gfx902:xnack-", "gfx902", false, true, false, 9, 0, 2, NONE, OFF, 4, 16, 1, 256, 64 * Ki, 32}, + {"gfx902:xnack+", "gfx902", false, true, false, 9, 0, 2, NONE, ON, 4, 16, 1, 256, 64 * Ki, 32}, {"gfx90a", nullptr, true, false, false, 9, 0, 10, ANY, ANY, 4, 16, 1, 256, 64 * Ki, 32}, {"gfx90a:sramecc-", nullptr, true, false, false, 9, 0, 10, OFF, ANY, 4, 16, 1, 256, 64 * Ki, 32}, {"gfx90a:sramecc+", nullptr, true, false, false, 9, 0, 10, ON, ANY, 4, 16, 1, 256, 64 * Ki, 32}, @@ -182,9 +182,15 @@ std::pair Isa::supportedIsas() { {"gfx1012", "gfx1012", true, true, false, 10, 1, 2, NONE, ANY, 2, 32, 1, 256, 64 * Ki, 32}, {"gfx1012:xnack-", "gfx1012", true, true, false, 10, 1, 2, NONE, OFF, 2, 32, 1, 256, 64 * Ki, 32}, {"gfx1012:xnack+", nullptr, true, true, false, 10, 1, 2, NONE, ON, 2, 32, 1, 256, 64 * Ki, 32}, + {"gfx1013", "gfx1013", true, false, false, 10, 1, 3, NONE, ANY, 2, 32, 1, 256, 64 * Ki, 32}, + {"gfx1013:xnack-", "gfx1013", true, false, false, 10, 1, 3, NONE, OFF, 2, 32, 1, 256, 64 * Ki, 32}, + {"gfx1013:xnack+", nullptr, true, false, false, 10, 1, 3, NONE, ON, 2, 32, 1, 256, 64 * Ki, 32}, {"gfx1030", "gfx1030", true, true, false, 10, 3, 0, NONE, NONE, 2, 32, 1, 256, 64 * Ki, 32}, {"gfx1031", "gfx1031", true, true, false, 10, 3, 1, NONE, NONE, 2, 32, 1, 256, 64 * Ki, 32}, {"gfx1032", "gfx1032", true, true, false, 10, 3, 2, NONE, NONE, 2, 32, 1, 256, 64 * Ki, 32}, + {"gfx1033", "gfx1033", true, false, false, 10, 3, 3, NONE, NONE, 2, 32, 1, 256, 64 * Ki, 32}, + {"gfx1034", "gfx1034", true, true, false, 10, 3, 4, NONE, NONE, 2, 32, 1, 256, 64 * Ki, 32}, + {"gfx1035", "gfx1035", true, false, false, 10, 3, 5, NONE, NONE, 2, 32, 1, 256, 64 * Ki, 32}, }; return std::make_pair(std::begin(supportedIsas_), std::end(supportedIsas_)); } @@ -327,6 +333,22 @@ void MemObjMap::UpdateAccess(amd::Device *peerDev) { } } +void MemObjMap::Purge(amd::Device* dev) { + assert(dev != nullptr); + + amd::ScopedLock lock(AllocatedLock_); + for (auto it = MemObjMap_.cbegin(); it != MemObjMap_.cend(); ) { + amd::Memory* memObj = it->second; + unsigned int flags = memObj->getMemFlags(); + const std::vector& devices = memObj->getContext().devices(); + if (devices.size() == 1 && devices[0] == dev && !(flags & ROCCLR_MEM_INTERNAL_MEMORY)) { + it = MemObjMap_.erase(it); + } else { + ++it; + } + } +} + Device::BlitProgram::~BlitProgram() { if (program_ != nullptr) { program_->release(); @@ -390,7 +412,8 @@ bool Device::init() { // If returned false, error initializing HSA stack. // If returned true, either HSA not installed or HSA stack // successfully initialized. - if (!roc::Device::init()) { + ret = roc::Device::init(); + if (!ret) { // abort() commentted because this is the only indication // that KFD is not installed. // Ignore the failure and assume KFD is not installed. diff --git a/device/device.hpp b/device/device.hpp index a41dbbee1..845b65df2 100644 --- a/device/device.hpp +++ b/device/device.hpp @@ -1,4 +1,4 @@ -/* Copyright (c) 2008-present Advanced Micro Devices, Inc. +/* Copyright (c) 2008 - 2021 Advanced Micro Devices, Inc. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal @@ -40,6 +40,12 @@ #include "hwdebug.hpp" #include "devsignal.hpp" +#if defined(__clang__) +#if __has_feature(address_sanitizer) +#include "devurilocator.hpp" +#endif +#endif + #include #include #include @@ -49,6 +55,7 @@ #include #include #include +#include #include namespace amd { @@ -97,10 +104,11 @@ enum MemoryAdvice : uint32_t { UnsetReadMostly = 2, ///< Undo the effect of hipMemAdviseSetReadMostly SetPreferredLocation = 3, ///< Set the preferred location for the data as the specified device UnsetPreferredLocation = 4, ///< Clear the preferred location for the data - SetAccessedBy = 5, ///< Data will be accessed by the specified device, - ///< so prevent page faults as much as possible - UnsetAccessedBy = 6 ///< Let the Unified Memory subsystem decide on - ///< the page faulting policy for the specified device + SetAccessedBy = 5, ///< Data will be accessed by the specified device, reducing + ///< the amount of page faults + UnsetAccessedBy = 6, ///< HMM decides on the page faulting policy for the specified device + SetCoarseGrain = 100, ///< Change cache policy to improve performance (disables coherency) + UnsetCoarseGrain = 101 ///< Restore coherent cache policy at the cost of some performance }; enum MemRangeAttribute : uint32_t { @@ -833,7 +841,8 @@ class Memory : public amd::HeapObject { SubMemoryObject = 0x00000008, //!< Memory is sub-memory HostMemoryRegistered = 0x00000010, //!< Host memory was registered MemoryCpuUncached = 0x00000020, //!< Memory is uncached on CPU access(slow read) - AllowedPeerAccess = 0x00000040 //!< Memory can be accessed from peer + AllowedPeerAccess = 0x00000040, //!< Memory can be accessed from peer + PersistentMap = 0x00000080 //!< Map Peristent memory }; uint flags_; //!< Memory object flags @@ -1237,6 +1246,7 @@ class MemObjMap : public AllStatic { static amd::Memory* FindMemObj( const void* k); //!< find the mem object based on the input pointer static void UpdateAccess(amd::Device *peerDev); + static void Purge(amd::Device* dev); //!< Purge all user allocated memories on the given device private: static std::map MemObjMap_; //!< the mem object<->hostptr information container @@ -1695,6 +1705,16 @@ class Device : public RuntimeObject { cl_set_device_clock_mode_output_amd* pSetClockModeOutput) { return true; }; + + // Returns the status of HW event, associated with amd::Event + virtual bool IsHwEventReady( + const amd::Event& event, //!< AMD event for HW status validation + bool wait = false //!< If true then forces the event completion + ) const { + return false; + }; + virtual void ReleaseGlobalSignal(void* signal) const {} + //! Returns TRUE if the device is available for computations bool isOnline() const { return online_; } @@ -1823,9 +1843,13 @@ class Device : public RuntimeObject { void SetActiveWait(bool state) { activeWait_ = state; } virtual amd::Memory* GetArenaMemObj(const void* ptr, size_t& offset) { - ShouldNotReachHere(); return nullptr; } +#if defined(__clang__) +#if __has_feature(address_sanitizer) + virtual device::UriLocator* createUriLocator() const = 0; +#endif +#endif protected: //! Enable the specified extension diff --git a/device/devkernel.cpp b/device/devkernel.cpp index c8079ec9e..2fd4a9ed7 100644 --- a/device/devkernel.cpp +++ b/device/devkernel.cpp @@ -1,4 +1,4 @@ -/* Copyright (c) 2008-present Advanced Micro Devices, Inc. +/* Copyright (c) 2008 - 2021 Advanced Micro Devices, Inc. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/device/devkernel.hpp b/device/devkernel.hpp index 155b219a6..7fa0e8ba6 100644 --- a/device/devkernel.hpp +++ b/device/devkernel.hpp @@ -1,4 +1,4 @@ -/* Copyright (c) 2008-present Advanced Micro Devices, Inc. +/* Copyright (c) 2008 - 2021 Advanced Micro Devices, Inc. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/device/devprogram.cpp b/device/devprogram.cpp index 4ada87fbd..279691d0f 100644 --- a/device/devprogram.cpp +++ b/device/devprogram.cpp @@ -1,4 +1,4 @@ -/* Copyright (c) 2008-present Advanced Micro Devices, Inc. +/* Copyright (c) 2008 - 2021 Advanced Micro Devices, Inc. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal @@ -713,14 +713,6 @@ bool Program::compileImplLC(const std::string& sourceCode, buildLog_ += "Warning: opening the file to dump the OpenCL source failed.\n"; } } - // Append Options provided by user to driver options - if (isHIP()) { - if (options->origOptionStr.size()) { - std::istringstream userOptions{options->origOptionStr}; - std::copy(std::istream_iterator(userOptions), - std::istream_iterator(), std::back_inserter(driverOptions)); - } - } // Append Options provided by user to driver options if (isHIP()) { diff --git a/device/devprogram.hpp b/device/devprogram.hpp index be6176633..cd2a3c74f 100644 --- a/device/devprogram.hpp +++ b/device/devprogram.hpp @@ -1,4 +1,4 @@ -/* Copyright (c) 2008-present Advanced Micro Devices, Inc. +/* Copyright (c) 2008 - 2021 Advanced Micro Devices, Inc. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/device/devsanitizer.hpp b/device/devsanitizer.hpp new file mode 100644 index 000000000..beabe6078 --- /dev/null +++ b/device/devsanitizer.hpp @@ -0,0 +1,110 @@ +/* Copyright (c) 2021 - 2021 Advanced Micro Devices, Inc. + + Permission is hereby granted, free of charge, to any person obtaining a copy + of this software and associated documentation files (the "Software"), to deal + in the Software without restriction, including without limitation the rights + to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + copies of the Software, and to permit persons to whom the Software is + furnished to do so, subject to the following conditions: + + The above copyright notice and this permission notice shall be included in + all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + THE SOFTWARE. */ + +#pragma once +#include "device/devhostcall.hpp" +#include "device/device.hpp" +#include "device/devurilocator.hpp" +#include "utils/debug.hpp" +#include "platform/memory.hpp" + +#include //to exp +#include +#include +#include +#include + +//Address sanitizer runtime entry-function to report the invalid device memory access +//this will be defined in llvm-project/compiler-rt/lib/asan, and will have effect only +//when compiler-rt is build for AMDGPU. +//Note: This API is runtime interface of asan library and only defined for linux os. +extern "C" +void __asan_report_nonself_error(uint64_t *callstack, uint32_t n_callstack, uint64_t* addr, + uint32_t naddr, uint64_t* entity_ids, uint32_t n_entities, bool is_write, uint32_t access_size, + bool is_abort, const char* name, int64_t vma_adjust, int fd, + uint64_t file_extent_size, uint64_t file_extent_start = 0); + +namespace { +void handleSanitizerService(Payload* packt_payload, uint64_t activemask, + const amd::Device* gpu_device, device::UriLocator* uri_locator) { + // An address results in invalid access in each active lane + uint64_t device_failing_addresses[64]; + // An array of identifications of entities requesting a report. + // index 0 - contains device id + // index 1,2,3 - contains wg_idx, wg_idy, wg_idz respectively. + // index 4 to 67 - contains reporting wave ids in a wave-front. + uint64_t entity_id[68], callstack[1]; + uint32_t n_activelanes = __builtin_popcountl(activemask); + uint64_t access_info = 0, access_size = 0; + bool is_abort = true; + entity_id[0] = gpu_device->index(); + + assert(packt_payload != nullptr && "packet payload is null?"); + + int indx = 0, en_idx = 1; + bool first_workitem = false; + while (activemask) { + auto wi = amd::leastBitSet(activemask); + activemask ^= static_cast(1) << wi; + auto data_slot = packt_payload->slots[wi]; + //encoding of packet payload arguments is + //defined in device-libs/asanrtl/src/report.cl + if (!first_workitem) { + device_failing_addresses[indx] = data_slot[0]; + callstack[0] = data_slot[1]; + entity_id[en_idx] = data_slot[2]; + entity_id[++en_idx] = data_slot[3]; + entity_id[++en_idx] = data_slot[4]; + entity_id[++en_idx] = data_slot[5]; + access_info = data_slot[6]; + access_size = data_slot[7]; + first_workitem = true; + } + else { + device_failing_addresses[indx] = data_slot[0]; + entity_id[en_idx] = data_slot[5]; + } + indx++; + en_idx++; + } + + bool is_write = false; + if (access_info & 0xFFFFFFFF00000000) + is_abort = false; + if (access_info & 1) + is_write = true; + + std::string fileuri; + uint64_t size = 0, offset = 0; + int64_t loadAddrAdjust = 0; + auto uri_fd = amd::Os::FDescInit(); + if (uri_locator) { + device::UriLocator::UriInfo fileuri_info = uri_locator->lookUpUri(callstack[0]); + std::tie(offset, size) = uri_locator->decodeUriAndGetFd(fileuri_info, &uri_fd); + loadAddrAdjust = fileuri_info.loadAddressDiff; + } + +#if defined(__linux__) + __asan_report_nonself_error(callstack, 1, device_failing_addresses, n_activelanes, + entity_id, n_activelanes+4, is_write, access_size, is_abort, + /*thread key*/"amdgpu", loadAddrAdjust, uri_fd, size, offset); +#endif +} +} //end anonymous namespace diff --git a/device/devsignal.hpp b/device/devsignal.hpp index 3ce7b0766..20850c9d2 100644 --- a/device/devsignal.hpp +++ b/device/devsignal.hpp @@ -1,4 +1,4 @@ -/* Copyright (c) 2021-present Advanced Micro Devices, Inc. +/* Copyright (c) 2021 - 2021 Advanced Micro Devices, Inc. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/device/devurilocator.hpp b/device/devurilocator.hpp new file mode 100644 index 000000000..730d7a2ff --- /dev/null +++ b/device/devurilocator.hpp @@ -0,0 +1,43 @@ +/* Copyright (c) 2019 - 2021 Advanced Micro Devices, Inc. + + Permission is hereby granted, free of charge, to any person obtaining a copy + of this software and associated documentation files (the "Software"), to deal + in the Software without restriction, including without limitation the rights + to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + copies of the Software, and to permit persons to whom the Software is + furnished to do so, subject to the following conditions: + + The above copyright notice and this permission notice shall be included in + all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + THE SOFTWARE. */ + +#pragma once +#if defined(__clang__) +#if __has_feature(address_sanitizer) +#include "os/os.hpp" +#include +#include +namespace device{ +// Interface for HSA/PAL Uri Locators +class UriLocator { + public: + struct UriInfo { + std::string uriPath; + int64_t loadAddressDiff; + }; + + virtual ~UriLocator() {} + virtual UriInfo lookUpUri(uint64_t device_pc) = 0; + virtual std::pair decodeUriAndGetFd(UriInfo& uri, + amd::Os::FileDesc* uri_fd) = 0; +}; +} //namespace device +#endif +#endif diff --git a/device/devwavelimiter.cpp b/device/devwavelimiter.cpp index c79d206d6..4180cfdac 100644 --- a/device/devwavelimiter.cpp +++ b/device/devwavelimiter.cpp @@ -1,4 +1,4 @@ -/* Copyright (c) 2015-present Advanced Micro Devices, Inc. +/* Copyright (c) 2015 - 2021 Advanced Micro Devices, Inc. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/device/devwavelimiter.hpp b/device/devwavelimiter.hpp index 394c2cef3..f4c29039a 100644 --- a/device/devwavelimiter.hpp +++ b/device/devwavelimiter.hpp @@ -1,4 +1,4 @@ -/* Copyright (c) 2008-present Advanced Micro Devices, Inc. +/* Copyright (c) 2008 - 2021 Advanced Micro Devices, Inc. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/device/gpu/gpuappprofile.cpp b/device/gpu/gpuappprofile.cpp index 233c00783..417d1a1b2 100644 --- a/device/gpu/gpuappprofile.cpp +++ b/device/gpu/gpuappprofile.cpp @@ -1,4 +1,4 @@ -/* Copyright (c) 2014-present Advanced Micro Devices, Inc. +/* Copyright (c) 2014 - 2021 Advanced Micro Devices, Inc. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/device/gpu/gpuappprofile.hpp b/device/gpu/gpuappprofile.hpp index 72b5a5622..35d3f79ac 100644 --- a/device/gpu/gpuappprofile.hpp +++ b/device/gpu/gpuappprofile.hpp @@ -1,4 +1,4 @@ -/* Copyright (c) 2014-present Advanced Micro Devices, Inc. +/* Copyright (c) 2014 - 2021 Advanced Micro Devices, Inc. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/device/gpu/gpubinary.cpp b/device/gpu/gpubinary.cpp index 69b89a34d..48a3e2b51 100644 --- a/device/gpu/gpubinary.cpp +++ b/device/gpu/gpubinary.cpp @@ -1,4 +1,4 @@ -/* Copyright (c) 2009-present Advanced Micro Devices, Inc. +/* Copyright (c) 2009 - 2021 Advanced Micro Devices, Inc. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/device/gpu/gpubinary.hpp b/device/gpu/gpubinary.hpp index 2577ebe40..6a84035bd 100644 --- a/device/gpu/gpubinary.hpp +++ b/device/gpu/gpubinary.hpp @@ -1,4 +1,4 @@ -/* Copyright (c) 2009-present Advanced Micro Devices, Inc. +/* Copyright (c) 2009 - 2021 Advanced Micro Devices, Inc. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/device/gpu/gpublit.cpp b/device/gpu/gpublit.cpp index 32cd4985d..26bd37e9b 100644 --- a/device/gpu/gpublit.cpp +++ b/device/gpu/gpublit.cpp @@ -1,4 +1,4 @@ -/* Copyright (c) 2010-present Advanced Micro Devices, Inc. +/* Copyright (c) 2010 - 2021 Advanced Micro Devices, Inc. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/device/gpu/gpublit.hpp b/device/gpu/gpublit.hpp index 913f85097..045883162 100644 --- a/device/gpu/gpublit.hpp +++ b/device/gpu/gpublit.hpp @@ -1,4 +1,4 @@ -/* Copyright (c) 2010-present Advanced Micro Devices, Inc. +/* Copyright (c) 2010 - 2021 Advanced Micro Devices, Inc. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/device/gpu/gpucompiler.cpp b/device/gpu/gpucompiler.cpp index 3597aef40..540620350 100644 --- a/device/gpu/gpucompiler.cpp +++ b/device/gpu/gpucompiler.cpp @@ -1,4 +1,4 @@ -/* Copyright (c) 2008-present Advanced Micro Devices, Inc. +/* Copyright (c) 2008 - 2021 Advanced Micro Devices, Inc. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/device/gpu/gpuconstbuf.cpp b/device/gpu/gpuconstbuf.cpp index a49808ba6..03039d473 100644 --- a/device/gpu/gpuconstbuf.cpp +++ b/device/gpu/gpuconstbuf.cpp @@ -1,4 +1,4 @@ -/* Copyright (c) 2010-present Advanced Micro Devices, Inc. +/* Copyright (c) 2010 - 2021 Advanced Micro Devices, Inc. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/device/gpu/gpuconstbuf.hpp b/device/gpu/gpuconstbuf.hpp index c4cbb1340..83a3adad5 100644 --- a/device/gpu/gpuconstbuf.hpp +++ b/device/gpu/gpuconstbuf.hpp @@ -1,4 +1,4 @@ -/* Copyright (c) 2010-present Advanced Micro Devices, Inc. +/* Copyright (c) 2010 - 2021 Advanced Micro Devices, Inc. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/device/gpu/gpucounters.cpp b/device/gpu/gpucounters.cpp index 77db45a1e..95e77dc2e 100644 --- a/device/gpu/gpucounters.cpp +++ b/device/gpu/gpucounters.cpp @@ -1,4 +1,4 @@ -/* Copyright (c) 2008-present Advanced Micro Devices, Inc. +/* Copyright (c) 2008 - 2021 Advanced Micro Devices, Inc. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/device/gpu/gpucounters.hpp b/device/gpu/gpucounters.hpp index 8cb50ceac..f0500008f 100644 --- a/device/gpu/gpucounters.hpp +++ b/device/gpu/gpucounters.hpp @@ -1,4 +1,4 @@ -/* Copyright (c) 2009-present Advanced Micro Devices, Inc. +/* Copyright (c) 2009 - 2021 Advanced Micro Devices, Inc. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/device/gpu/gpudebugger.hpp b/device/gpu/gpudebugger.hpp index e0a28494b..e37546355 100644 --- a/device/gpu/gpudebugger.hpp +++ b/device/gpu/gpudebugger.hpp @@ -1,4 +1,4 @@ -/* Copyright (c) 2014-present Advanced Micro Devices, Inc. +/* Copyright (c) 2014 - 2021 Advanced Micro Devices, Inc. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/device/gpu/gpudebugmanager.cpp b/device/gpu/gpudebugmanager.cpp index 1a84353bc..48ea263e7 100644 --- a/device/gpu/gpudebugmanager.cpp +++ b/device/gpu/gpudebugmanager.cpp @@ -1,4 +1,4 @@ -/* Copyright (c) 2014-present Advanced Micro Devices, Inc. +/* Copyright (c) 2014 - 2021 Advanced Micro Devices, Inc. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/device/gpu/gpudebugmanager.hpp b/device/gpu/gpudebugmanager.hpp index 61bdda86a..18e6ad336 100644 --- a/device/gpu/gpudebugmanager.hpp +++ b/device/gpu/gpudebugmanager.hpp @@ -1,4 +1,4 @@ -/* Copyright (c) 2014-present Advanced Micro Devices, Inc. +/* Copyright (c) 2014 - 2021 Advanced Micro Devices, Inc. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/device/gpu/gpudefs.hpp b/device/gpu/gpudefs.hpp index d5e4f7dbc..6bf8f4a97 100644 --- a/device/gpu/gpudefs.hpp +++ b/device/gpu/gpudefs.hpp @@ -1,4 +1,4 @@ -/* Copyright (c) 2009-present Advanced Micro Devices, Inc. +/* Copyright (c) 2009 - 2021 Advanced Micro Devices, Inc. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/device/gpu/gpudevice.cpp b/device/gpu/gpudevice.cpp index 3eb7b7147..69694207a 100644 --- a/device/gpu/gpudevice.cpp +++ b/device/gpu/gpudevice.cpp @@ -1,4 +1,4 @@ -/* Copyright (c) 2008-present Advanced Micro Devices, Inc. +/* Copyright (c) 2008 - 2021 Advanced Micro Devices, Inc. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/device/gpu/gpudevice.hpp b/device/gpu/gpudevice.hpp index 35a83c768..5f15d8814 100644 --- a/device/gpu/gpudevice.hpp +++ b/device/gpu/gpudevice.hpp @@ -1,4 +1,4 @@ -/* Copyright (c) 2009-present Advanced Micro Devices, Inc. +/* Copyright (c) 2009 - 2021 Advanced Micro Devices, Inc. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal @@ -42,6 +42,12 @@ #include "hsailctx.hpp" #include "vaminterface.h" +#if defined(__clang__) +#if __has_feature(address_sanitizer) +#include "device/devurilocator.hpp" +#endif +#endif + /*! \addtogroup GPU * @{ */ @@ -140,6 +146,13 @@ class NullDevice : public amd::Device { virtual bool SetClockMode(const cl_set_device_clock_mode_input_amd setClockModeInput, cl_set_device_clock_mode_output_amd* pSetClockModeOutput) { return true; } +#if defined(__clang__) +#if __has_feature(address_sanitizer) + virtual device::UriLocator* createUriLocator() const { + return nullptr; + } +#endif +#endif protected: //! Answer the question: "Should HSAIL Program be created?", //! based on the given options. @@ -548,6 +561,13 @@ class Device : public NullDevice, public CALGSLDevice { //! Initial the Hardware Debug Manager int32_t hwDebugManagerInit(amd::Context* context, uintptr_t messageStorage); +#if defined(__clang__) +#if __has_feature(address_sanitizer) + virtual device::UriLocator* createUriLocator() const { + return nullptr; + } +#endif +#endif private: //! Disable copy constructor Device(const Device&); diff --git a/device/gpu/gpukernel.cpp b/device/gpu/gpukernel.cpp index 529ea5414..33d3da97f 100644 --- a/device/gpu/gpukernel.cpp +++ b/device/gpu/gpukernel.cpp @@ -1,4 +1,4 @@ -/* Copyright (c) 2008-present Advanced Micro Devices, Inc. +/* Copyright (c) 2008 - 2021 Advanced Micro Devices, Inc. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/device/gpu/gpukernel.hpp b/device/gpu/gpukernel.hpp index 20353441e..f637f9f87 100644 --- a/device/gpu/gpukernel.hpp +++ b/device/gpu/gpukernel.hpp @@ -1,4 +1,4 @@ -/* Copyright (c) 2008-present Advanced Micro Devices, Inc. +/* Copyright (c) 2008 - 2021 Advanced Micro Devices, Inc. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/device/gpu/gpumemory.cpp b/device/gpu/gpumemory.cpp index c45da0bf1..185ded8ab 100644 --- a/device/gpu/gpumemory.cpp +++ b/device/gpu/gpumemory.cpp @@ -1,4 +1,4 @@ -/* Copyright (c) 2008-present Advanced Micro Devices, Inc. +/* Copyright (c) 2008 - 2021 Advanced Micro Devices, Inc. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/device/gpu/gpumemory.hpp b/device/gpu/gpumemory.hpp index 9354900d9..adf7f4f3a 100644 --- a/device/gpu/gpumemory.hpp +++ b/device/gpu/gpumemory.hpp @@ -1,4 +1,4 @@ -/* Copyright (c) 2009-present Advanced Micro Devices, Inc. +/* Copyright (c) 2009 - 2021 Advanced Micro Devices, Inc. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/device/gpu/gpuprintf.cpp b/device/gpu/gpuprintf.cpp index 157d0c799..e049ecb38 100644 --- a/device/gpu/gpuprintf.cpp +++ b/device/gpu/gpuprintf.cpp @@ -1,4 +1,4 @@ -/* Copyright (c) 2010-present Advanced Micro Devices, Inc. +/* Copyright (c) 2010 - 2021 Advanced Micro Devices, Inc. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/device/gpu/gpuprintf.hpp b/device/gpu/gpuprintf.hpp index bbab1bf1b..2ef52079f 100644 --- a/device/gpu/gpuprintf.hpp +++ b/device/gpu/gpuprintf.hpp @@ -1,4 +1,4 @@ -/* Copyright (c) 2010-present Advanced Micro Devices, Inc. +/* Copyright (c) 2010 - 2021 Advanced Micro Devices, Inc. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/device/gpu/gpuprogram.cpp b/device/gpu/gpuprogram.cpp index 33fbe434c..fac1a59c1 100644 --- a/device/gpu/gpuprogram.cpp +++ b/device/gpu/gpuprogram.cpp @@ -1,4 +1,4 @@ -/* Copyright (c) 2008-present Advanced Micro Devices, Inc. +/* Copyright (c) 2008 - 2021 Advanced Micro Devices, Inc. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/device/gpu/gpuprogram.hpp b/device/gpu/gpuprogram.hpp index f4924d656..fe7026df4 100644 --- a/device/gpu/gpuprogram.hpp +++ b/device/gpu/gpuprogram.hpp @@ -1,4 +1,4 @@ -/* Copyright (c) 2008-present Advanced Micro Devices, Inc. +/* Copyright (c) 2008 - 2021 Advanced Micro Devices, Inc. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/device/gpu/gpuresource.cpp b/device/gpu/gpuresource.cpp index d4f19ba94..490a21e28 100644 --- a/device/gpu/gpuresource.cpp +++ b/device/gpu/gpuresource.cpp @@ -1,4 +1,4 @@ -/* Copyright (c) 2008-present Advanced Micro Devices, Inc. +/* Copyright (c) 2008 - 2021 Advanced Micro Devices, Inc. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/device/gpu/gpuresource.hpp b/device/gpu/gpuresource.hpp index 6f81357b2..52a1811c7 100644 --- a/device/gpu/gpuresource.hpp +++ b/device/gpu/gpuresource.hpp @@ -1,4 +1,4 @@ -/* Copyright (c) 2008-present Advanced Micro Devices, Inc. +/* Copyright (c) 2008 - 2021 Advanced Micro Devices, Inc. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/device/gpu/gpusched.hpp b/device/gpu/gpusched.hpp index 2414bde93..2ebc9e875 100644 --- a/device/gpu/gpusched.hpp +++ b/device/gpu/gpusched.hpp @@ -1,4 +1,4 @@ -/* Copyright (c) 2010-present Advanced Micro Devices, Inc. +/* Copyright (c) 2010 - 2021 Advanced Micro Devices, Inc. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/device/gpu/gpuschedcl.cpp b/device/gpu/gpuschedcl.cpp index 3549225d3..55f9394d5 100644 --- a/device/gpu/gpuschedcl.cpp +++ b/device/gpu/gpuschedcl.cpp @@ -1,4 +1,4 @@ -/* Copyright (c) 2010-present Advanced Micro Devices, Inc. +/* Copyright (c) 2010 - 2021 Advanced Micro Devices, Inc. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/device/gpu/gpuscsi.cpp b/device/gpu/gpuscsi.cpp index 748260235..a07122af2 100644 --- a/device/gpu/gpuscsi.cpp +++ b/device/gpu/gpuscsi.cpp @@ -1,4 +1,4 @@ -/* Copyright (c) 2008-present Advanced Micro Devices, Inc. +/* Copyright (c) 2008 - 2021 Advanced Micro Devices, Inc. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/device/gpu/gpusettings.cpp b/device/gpu/gpusettings.cpp index 8955e944a..5251a576d 100644 --- a/device/gpu/gpusettings.cpp +++ b/device/gpu/gpusettings.cpp @@ -1,4 +1,4 @@ -/* Copyright (c) 2010-present Advanced Micro Devices, Inc. +/* Copyright (c) 2010 - 2021 Advanced Micro Devices, Inc. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/device/gpu/gpusettings.hpp b/device/gpu/gpusettings.hpp index c8f1e900e..b51f8aa2b 100644 --- a/device/gpu/gpusettings.hpp +++ b/device/gpu/gpusettings.hpp @@ -1,4 +1,4 @@ -/* Copyright (c) 2010-present Advanced Micro Devices, Inc. +/* Copyright (c) 2010 - 2021 Advanced Micro Devices, Inc. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/device/gpu/gputhreadtrace.cpp b/device/gpu/gputhreadtrace.cpp index 3690589a4..95faa038e 100644 --- a/device/gpu/gputhreadtrace.cpp +++ b/device/gpu/gputhreadtrace.cpp @@ -1,4 +1,4 @@ -/* Copyright (c) 2008-present Advanced Micro Devices, Inc. +/* Copyright (c) 2008 - 2021 Advanced Micro Devices, Inc. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/device/gpu/gputhreadtrace.hpp b/device/gpu/gputhreadtrace.hpp index dde87ad18..4b3789560 100644 --- a/device/gpu/gputhreadtrace.hpp +++ b/device/gpu/gputhreadtrace.hpp @@ -1,4 +1,4 @@ -/* Copyright (c) 2009-present Advanced Micro Devices, Inc. +/* Copyright (c) 2009 - 2021 Advanced Micro Devices, Inc. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/device/gpu/gputimestamp.cpp b/device/gpu/gputimestamp.cpp index ee0bb5b85..a21c655d5 100644 --- a/device/gpu/gputimestamp.cpp +++ b/device/gpu/gputimestamp.cpp @@ -1,4 +1,4 @@ -/* Copyright (c) 2008-present Advanced Micro Devices, Inc. +/* Copyright (c) 2008 - 2021 Advanced Micro Devices, Inc. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/device/gpu/gputimestamp.hpp b/device/gpu/gputimestamp.hpp index 8c37ab8d0..007c9d59c 100644 --- a/device/gpu/gputimestamp.hpp +++ b/device/gpu/gputimestamp.hpp @@ -1,4 +1,4 @@ -/* Copyright (c) 2008-present Advanced Micro Devices, Inc. +/* Copyright (c) 2008 - 2021 Advanced Micro Devices, Inc. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/device/gpu/gpuvirtual.cpp b/device/gpu/gpuvirtual.cpp index f84cfadaf..5073e34f0 100644 --- a/device/gpu/gpuvirtual.cpp +++ b/device/gpu/gpuvirtual.cpp @@ -1,4 +1,4 @@ -/* Copyright (c) 2008-present Advanced Micro Devices, Inc. +/* Copyright (c) 2008 - 2021 Advanced Micro Devices, Inc. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/device/gpu/gpuvirtual.hpp b/device/gpu/gpuvirtual.hpp index a4700f91b..29db28791 100644 --- a/device/gpu/gpuvirtual.hpp +++ b/device/gpu/gpuvirtual.hpp @@ -1,4 +1,4 @@ -/* Copyright (c) 2008-present Advanced Micro Devices, Inc. +/* Copyright (c) 2008 - 2021 Advanced Micro Devices, Inc. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/device/gpu/gslbe/src/rt/DxxOpenCLInteropExt.h b/device/gpu/gslbe/src/rt/DxxOpenCLInteropExt.h index d1ae91a45..abdcbd9ae 100644 --- a/device/gpu/gslbe/src/rt/DxxOpenCLInteropExt.h +++ b/device/gpu/gslbe/src/rt/DxxOpenCLInteropExt.h @@ -1,4 +1,4 @@ - /* Copyright (c) 2008-present Advanced Micro Devices, Inc. + /* Copyright (c) 2008 - 2021 Advanced Micro Devices, Inc. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/device/gpu/gslbe/src/rt/EventQueue.cpp b/device/gpu/gslbe/src/rt/EventQueue.cpp index d6371c861..6cbd1fa78 100644 --- a/device/gpu/gslbe/src/rt/EventQueue.cpp +++ b/device/gpu/gslbe/src/rt/EventQueue.cpp @@ -1,4 +1,4 @@ - /* Copyright (c) 2008-present Advanced Micro Devices, Inc. + /* Copyright (c) 2008 - 2021 Advanced Micro Devices, Inc. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/device/gpu/gslbe/src/rt/EventQueue.h b/device/gpu/gslbe/src/rt/EventQueue.h index 7b4f9055a..4d90af96a 100644 --- a/device/gpu/gslbe/src/rt/EventQueue.h +++ b/device/gpu/gslbe/src/rt/EventQueue.h @@ -1,4 +1,4 @@ - /* Copyright (c) 2008-present Advanced Micro Devices, Inc. + /* Copyright (c) 2008 - 2021 Advanced Micro Devices, Inc. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/device/gpu/gslbe/src/rt/GSLContext.cpp b/device/gpu/gslbe/src/rt/GSLContext.cpp index e28f87e5f..188498256 100644 --- a/device/gpu/gslbe/src/rt/GSLContext.cpp +++ b/device/gpu/gslbe/src/rt/GSLContext.cpp @@ -1,4 +1,4 @@ - /* Copyright (c) 2008-present Advanced Micro Devices, Inc. + /* Copyright (c) 2008 - 2021 Advanced Micro Devices, Inc. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/device/gpu/gslbe/src/rt/GSLContext.h b/device/gpu/gslbe/src/rt/GSLContext.h index f7242f067..5d3d84f36 100644 --- a/device/gpu/gslbe/src/rt/GSLContext.h +++ b/device/gpu/gslbe/src/rt/GSLContext.h @@ -1,4 +1,4 @@ - /* Copyright (c) 2008-present Advanced Micro Devices, Inc. + /* Copyright (c) 2008 - 2021 Advanced Micro Devices, Inc. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/device/gpu/gslbe/src/rt/GSLDevice.cpp b/device/gpu/gslbe/src/rt/GSLDevice.cpp index 4b6784f89..eb24042ff 100644 --- a/device/gpu/gslbe/src/rt/GSLDevice.cpp +++ b/device/gpu/gslbe/src/rt/GSLDevice.cpp @@ -1,4 +1,4 @@ - /* Copyright (c) 2008-present Advanced Micro Devices, Inc. + /* Copyright (c) 2008 - 2021 Advanced Micro Devices, Inc. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/device/gpu/gslbe/src/rt/GSLDevice.h b/device/gpu/gslbe/src/rt/GSLDevice.h index 63dfb5731..d67bad8d5 100644 --- a/device/gpu/gslbe/src/rt/GSLDevice.h +++ b/device/gpu/gslbe/src/rt/GSLDevice.h @@ -1,4 +1,4 @@ - /* Copyright (c) 2008-present Advanced Micro Devices, Inc. + /* Copyright (c) 2008 - 2021 Advanced Micro Devices, Inc. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/device/gpu/gslbe/src/rt/GSLDeviceD3D10.cpp b/device/gpu/gslbe/src/rt/GSLDeviceD3D10.cpp index 197c5bf83..e7462f12d 100644 --- a/device/gpu/gslbe/src/rt/GSLDeviceD3D10.cpp +++ b/device/gpu/gslbe/src/rt/GSLDeviceD3D10.cpp @@ -1,4 +1,4 @@ - /* Copyright (c) 2008-present Advanced Micro Devices, Inc. + /* Copyright (c) 2008 - 2021 Advanced Micro Devices, Inc. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/device/gpu/gslbe/src/rt/GSLDeviceD3D11.cpp b/device/gpu/gslbe/src/rt/GSLDeviceD3D11.cpp index e2a2c875e..b2c357835 100644 --- a/device/gpu/gslbe/src/rt/GSLDeviceD3D11.cpp +++ b/device/gpu/gslbe/src/rt/GSLDeviceD3D11.cpp @@ -1,4 +1,4 @@ - /* Copyright (c) 2008-present Advanced Micro Devices, Inc. + /* Copyright (c) 2008 - 2021 Advanced Micro Devices, Inc. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/device/gpu/gslbe/src/rt/GSLDeviceD3D9.cpp b/device/gpu/gslbe/src/rt/GSLDeviceD3D9.cpp index b7bafbad7..dc049ecdf 100644 --- a/device/gpu/gslbe/src/rt/GSLDeviceD3D9.cpp +++ b/device/gpu/gslbe/src/rt/GSLDeviceD3D9.cpp @@ -1,4 +1,4 @@ - /* Copyright (c) 2008-present Advanced Micro Devices, Inc. + /* Copyright (c) 2008 - 2021 Advanced Micro Devices, Inc. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/device/gpu/gslbe/src/rt/GSLDeviceGL.cpp b/device/gpu/gslbe/src/rt/GSLDeviceGL.cpp index 42f576a46..22dcc41c9 100644 --- a/device/gpu/gslbe/src/rt/GSLDeviceGL.cpp +++ b/device/gpu/gslbe/src/rt/GSLDeviceGL.cpp @@ -1,4 +1,4 @@ - /* Copyright (c) 2008-present Advanced Micro Devices, Inc. + /* Copyright (c) 2008 - 2021 Advanced Micro Devices, Inc. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/device/gpu/gslbe/src/rt/GSLStubs.cpp b/device/gpu/gslbe/src/rt/GSLStubs.cpp index 205226d2e..731023d1d 100644 --- a/device/gpu/gslbe/src/rt/GSLStubs.cpp +++ b/device/gpu/gslbe/src/rt/GSLStubs.cpp @@ -1,4 +1,4 @@ - /* Copyright (c) 2008-present Advanced Micro Devices, Inc. + /* Copyright (c) 2008 - 2021 Advanced Micro Devices, Inc. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/device/gpu/gslbe/src/rt/backend.cpp b/device/gpu/gslbe/src/rt/backend.cpp index 531ffa017..6aea71353 100644 --- a/device/gpu/gslbe/src/rt/backend.cpp +++ b/device/gpu/gslbe/src/rt/backend.cpp @@ -1,4 +1,4 @@ - /* Copyright (c) 2008-present Advanced Micro Devices, Inc. + /* Copyright (c) 2008 - 2021 Advanced Micro Devices, Inc. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/device/gpu/gslbe/src/rt/backend.h b/device/gpu/gslbe/src/rt/backend.h index 90215e78e..5970aff64 100644 --- a/device/gpu/gslbe/src/rt/backend.h +++ b/device/gpu/gslbe/src/rt/backend.h @@ -1,4 +1,4 @@ - /* Copyright (c) 2008-present Advanced Micro Devices, Inc. + /* Copyright (c) 2008 - 2021 Advanced Micro Devices, Inc. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/device/gpu/gslbe/src/rt/caltarget.h b/device/gpu/gslbe/src/rt/caltarget.h index 3f0b3ded8..2f5b0fb6c 100644 --- a/device/gpu/gslbe/src/rt/caltarget.h +++ b/device/gpu/gslbe/src/rt/caltarget.h @@ -1,4 +1,4 @@ - /* Copyright (c) 2008-present Advanced Micro Devices, Inc. + /* Copyright (c) 2008 - 2021 Advanced Micro Devices, Inc. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/device/gpu/gslbe/src/rt/inifile/ini_export.cpp b/device/gpu/gslbe/src/rt/inifile/ini_export.cpp index 9e8c24fab..757ffef8c 100644 --- a/device/gpu/gslbe/src/rt/inifile/ini_export.cpp +++ b/device/gpu/gslbe/src/rt/inifile/ini_export.cpp @@ -1,4 +1,4 @@ - /* Copyright (c) 2008-present Advanced Micro Devices, Inc. + /* Copyright (c) 2008 - 2021 Advanced Micro Devices, Inc. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/device/gpu/gslbe/src/rt/inifile/ini_export.h b/device/gpu/gslbe/src/rt/inifile/ini_export.h index 33ecbd6f3..43f489fce 100644 --- a/device/gpu/gslbe/src/rt/inifile/ini_export.h +++ b/device/gpu/gslbe/src/rt/inifile/ini_export.h @@ -1,4 +1,4 @@ - /* Copyright (c) 2008-present Advanced Micro Devices, Inc. + /* Copyright (c) 2008 - 2021 Advanced Micro Devices, Inc. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/device/gpu/gslbe/src/rt/inifile/ini_values.h b/device/gpu/gslbe/src/rt/inifile/ini_values.h index 70ed3c982..289fb8a6f 100644 --- a/device/gpu/gslbe/src/rt/inifile/ini_values.h +++ b/device/gpu/gslbe/src/rt/inifile/ini_values.h @@ -1,4 +1,4 @@ - /* Copyright (c) 2008-present Advanced Micro Devices, Inc. + /* Copyright (c) 2008 - 2021 Advanced Micro Devices, Inc. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/device/gpu/gslbe/src/rt/inifile/inifile.cpp b/device/gpu/gslbe/src/rt/inifile/inifile.cpp index e1802ad4e..30c15dc16 100644 --- a/device/gpu/gslbe/src/rt/inifile/inifile.cpp +++ b/device/gpu/gslbe/src/rt/inifile/inifile.cpp @@ -1,4 +1,4 @@ - /* Copyright (c) 2008-present Advanced Micro Devices, Inc. + /* Copyright (c) 2008 - 2021 Advanced Micro Devices, Inc. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/device/gpu/gslbe/src/rt/inifile/inifile.h b/device/gpu/gslbe/src/rt/inifile/inifile.h index cf8231f4b..a44d36d9a 100644 --- a/device/gpu/gslbe/src/rt/inifile/inifile.h +++ b/device/gpu/gslbe/src/rt/inifile/inifile.h @@ -1,4 +1,4 @@ - /* Copyright (c) 2008-present Advanced Micro Devices, Inc. + /* Copyright (c) 2008 - 2021 Advanced Micro Devices, Inc. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/device/gpu/gslbe/src/rt/inifile/inifile_parser.cpp b/device/gpu/gslbe/src/rt/inifile/inifile_parser.cpp index ff372688e..04a8f4247 100644 --- a/device/gpu/gslbe/src/rt/inifile/inifile_parser.cpp +++ b/device/gpu/gslbe/src/rt/inifile/inifile_parser.cpp @@ -1,4 +1,4 @@ - /* Copyright (c) 2008-present Advanced Micro Devices, Inc. + /* Copyright (c) 2008 - 2021 Advanced Micro Devices, Inc. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/device/gpu/gslbe/src/rt/inifile/inifile_parser.h b/device/gpu/gslbe/src/rt/inifile/inifile_parser.h index 210ec86f8..d8faad0fe 100644 --- a/device/gpu/gslbe/src/rt/inifile/inifile_parser.h +++ b/device/gpu/gslbe/src/rt/inifile/inifile_parser.h @@ -1,4 +1,4 @@ - /* Copyright (c) 2008-present Advanced Micro Devices, Inc. + /* Copyright (c) 2008 - 2021 Advanced Micro Devices, Inc. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/device/hsailctx.cpp b/device/hsailctx.cpp index d8cfe0fd0..fccfda35c 100644 --- a/device/hsailctx.cpp +++ b/device/hsailctx.cpp @@ -1,4 +1,4 @@ -/* Copyright (c) 2021-present Advanced Micro Devices, Inc. +/* Copyright (c) 2021 - 2021 Advanced Micro Devices, Inc. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/device/hsailctx.hpp b/device/hsailctx.hpp index ced65c7da..1b4144838 100644 --- a/device/hsailctx.hpp +++ b/device/hsailctx.hpp @@ -1,4 +1,4 @@ -/* Copyright (c) 2021-present Advanced Micro Devices, Inc. +/* Copyright (c) 2021 - 2021 Advanced Micro Devices, Inc. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/device/hwdebug.cpp b/device/hwdebug.cpp index b0397c368..85e38d74f 100644 --- a/device/hwdebug.cpp +++ b/device/hwdebug.cpp @@ -1,4 +1,4 @@ -/* Copyright (c) 2014-present Advanced Micro Devices, Inc. +/* Copyright (c) 2014 - 2021 Advanced Micro Devices, Inc. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/device/hwdebug.hpp b/device/hwdebug.hpp index 08d830b13..b8bb1c5f1 100644 --- a/device/hwdebug.hpp +++ b/device/hwdebug.hpp @@ -1,4 +1,4 @@ -/* Copyright (c) 2014-present Advanced Micro Devices, Inc. +/* Copyright (c) 2014 - 2021 Advanced Micro Devices, Inc. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/device/pal/CMakeLists.txt b/device/pal/CMakeLists.txt deleted file mode 100644 index 121da3070..000000000 --- a/device/pal/CMakeLists.txt +++ /dev/null @@ -1,164 +0,0 @@ -# Copyright (C) 2020-2021 Advanced Micro Devices, Inc. All Rights Reserved. -# -# Permission is hereby granted, free of charge, to any person obtaining a copy -# of this software and associated documentation files (the "Software"), to deal -# in the Software without restriction, including without limitation the rights -# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -# copies of the Software, and to permit persons to whom the Software is -# furnished to do so, subject to the following conditions: -# -# The above copyright notice and this permission notice shall be included in -# all copies or substantial portions of the Software. -# -# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -# THE SOFTWARE. - -### -file(STRINGS palcdefs PAL_MAJOR_VERSION REGEX "^PAL_MAJOR_VERSION = [0-9]+") -string(REGEX REPLACE "PAL_MAJOR_VERSION = " "" PAL_MAJOR_VERSION ${PAL_MAJOR_VERSION}) - -file(STRINGS palcdefs GPUOPEN_MAJOR_VERSION REGEX "^GPUOPEN_MAJOR_VERSION = [0-9]+") -string(REGEX REPLACE "GPUOPEN_MAJOR_VERSION = " "" GPUOPEN_MAJOR_VERSION ${GPUOPEN_MAJOR_VERSION}) - -### Create PAL backend library for ROCclr -add_library(rocclrpal OBJECT "") - -# Add defines necessary for PAL build -set(PAL_CLIENT_INTERFACE_MAJOR_VERSION ${PAL_MAJOR_VERSION} - CACHE STRING "PAL major interface: ${PAL_MAJOR_VERSION}" FORCE) -set(GPUOPEN_CLIENT_INTERFACE_MAJOR_VERSION ${GPUOPEN_MAJOR_VERSION} - CACHE STRING "GPU open major interface: ${GPUOPEN_MAJOR_VERSION}" FORCE) -set(GPUOPEN_CLIENT_INTERFACE_MINOR_VERSION "0" CACHE STRING "GPU open minor interface: 0" FORCE) - -set(PAL_CLIENT "OCL" CACHE STRING "Build PAL OCL support" FORCE) - -set(PAL_BUILD_GPUOPEN ON CACHE BOOL "Build GPUOpen support") -set(PAL_BUILD_VIDEO OFF CACHE BOOL "Don't build PAL with Video support") -set(PAL_BUILD_SCPC OFF CACHE BOOL "Don't build SCPC") - -set(PAL_CLOSED_SOURCE ON CACHE BOOL "Build closed source PAL") - -set(PAL_BUILD_POLARIS22 ON CACHE BOOL "Build PAL with Polaris support") -set(PAL_BUILD_GFX9 ON CACHE BOOL "Build PAL with GFX9 support") -set(PAL_BUILD_OSS4 ON CACHE BOOL "Build PAL with OSS4 support") -set(PAL_BUILD_VEGA12 ON CACHE BOOL "Build PAL with VEGA12 support") -set(PAL_BUILD_VEGA20 ON CACHE BOOL "Build PAL with GFX9 support") -set(PAL_BUILD_RAVEN1 ON CACHE BOOL "Build PAL with Raven1 support") -set(PAL_BUILD_RAVEN2 ON CACHE BOOL "Build PAL with Raven2 support") -set(PAL_BUILD_RENOIR ON CACHE BOOL "Build PAL with Renoir support") - -set(PAL_BUILD_GFX10 ON CACHE BOOL "Build PAL with GFX10 support") -set(PAL_BUILD_NAVI12 ON CACHE BOOL "Build PAL with Navi12 support") -set(PAL_BUILD_NAVI14 ON CACHE BOOL "Build PAL with Navi14 support") - -option(PAL_DEVELOPER_BUILD "No developer build" OFF) - -set(PYTHON_EXECUTABLE /usr/bin/python3) - -# Build PAL . . . -set(GLOBAL_ROOT_SRC_DIR ${CMAKE_CURRENT_SOURCE_DIR}) -add_subdirectory(palbe) - -# Setup defines for the client compilation -target_compile_definitions(rocclrpal PRIVATE - PAL_CLIENT_INTERFACE_MAJOR_VERSION=${PAL_CLIENT_INTERFACE_MAJOR_VERSION}) -target_compile_definitions(rocclrpal PRIVATE - GPUOPEN_CLIENT_INTERFACE_MAJOR_VERSION=${GPUOPEN_CLIENT_INTERFACE_MAJOR_VERSION}) -target_compile_definitions(rocclrpal PRIVATE - GPUOPEN_CLIENT_INTERFACE_MINOR_VERSION=${GPUOPEN_CLIENT_INTERFACE_MINOR_VERSION}) - -target_compile_definitions(rocclrpal PRIVATE PAL_CLIENT_OCL=1) -target_compile_definitions(rocclrpal PRIVATE PAL_GPUOPEN_OCL=1) -target_compile_definitions(rocclrpal PRIVATE PAL_BUILD_GPUOPEN=1) -target_compile_definitions(rocclrpal PRIVATE PAL_BUILD_SCPC=0) -target_compile_definitions(rocclrpal PRIVATE PAL_CLOSED_SOURCE=1) -target_compile_definitions(rocclrpal PRIVATE PAL_BUILD_GPUOPEN=1) - -# GFX9 targets -target_compile_definitions(rocclrpal PRIVATE PAL_BUILD_POLARIS22=1) -target_compile_definitions(rocclrpal PRIVATE PAL_BUILD_GFX9=1) -target_compile_definitions(rocclrpal PRIVATE PAL_BUILD_OSS4=1) -target_compile_definitions(rocclrpal PRIVATE PAL_BUILD_VEGA12=1) -target_compile_definitions(rocclrpal PRIVATE PAL_BUILD_VEGA20=1) -target_compile_definitions(rocclrpal PRIVATE PAL_BUILD_RAVEN1=1) -target_compile_definitions(rocclrpal PRIVATE PAL_BUILD_RAVEN2=1) -target_compile_definitions(rocclrpal PRIVATE PAL_BUILD_RENOIR=1) - -# GFX10 targets -target_compile_definitions(rocclrpal PRIVATE PAL_BUILD_GFX10=1) -target_compile_definitions(rocclrpal PRIVATE PAL_BUILD_NAVI12=1) -target_compile_definitions(rocclrpal PRIVATE PAL_BUILD_NAVI14=1) - -target_compile_definitions(rocclrpal PRIVATE OPENCL_MAINLINE=1) - -if(${USE_COMGR_LIBRARY} MATCHES "yes") - target_compile_definitions(rocclrpal PRIVATE USE_COMGR_LIBRARY) -endif() - -if(UNIX) - target_compile_definitions(rocclrpal PRIVATE PAL_AMDGPU_BUILD) -endif() - -target_include_directories(rocclrpal PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/compiler/sc/HSAIL/include) -target_include_directories(rocclrpal PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/../asic_reg) - -target_include_directories(rocclrpal PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/device/pal/palbe) -target_include_directories(rocclrpal PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/device/pal/palbe/inc) -target_include_directories(rocclrpal PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/device/pal/palbe/inc/core) -target_include_directories(rocclrpal PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/device/pal/palbe/inc/util) -target_include_directories(rocclrpal PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/device/pal/palbe/shared/gpuopen/inc) -target_include_directories(rocclrpal PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/device/pal/palbe/shared/gpuopen/third_party/metrohash/src) -target_include_directories(rocclrpal PRIVATE ${CMAKE_CURRENT_BINARY_DIR}) - -target_include_directories(rocclrpal - PUBLIC - $ - $ - $ - # GL and EGL headers - $ - $ - $ - PRIVATE - ${OPENCL_DIR} - ${PROJECT_SOURCE_DIR}/compiler/lib - ${PROJECT_SOURCE_DIR}/compiler/lib/include - ${PROJECT_SOURCE_DIR}/compiler/lib/backends/common - ${CMAKE_CURRENT_BINARY_DIR} - ${ROCM_OCL_INCLUDES} - ${ROCR_INCLUDES}) - - -target_sources(rocclrpal PRIVATE - palappprofile.cpp - palblit.cpp - palconstbuf.cpp - palcounters.cpp - paldebugmanager.cpp - paldevice.cpp - paldeviced3d10.cpp - paldeviced3d11.cpp - paldeviced3d9.cpp - paldevicegl.cpp - palgpuopen.cpp - palkernel.cpp - palmemory.cpp - palprintf.cpp - palprogram.cpp - palresource.cpp - palschedcl.cpp - palsettings.cpp - palsignal.cpp - palthreadtrace.cpp - paltimestamp.cpp - palvirtual.cpp -) - -set_target_properties(rocclrpal PROPERTIES POSITION_INDEPENDENT_CODE ON) - - diff --git a/device/pal/palappprofile.cpp b/device/pal/palappprofile.cpp index 8bd5e88cb..771cd3b16 100644 --- a/device/pal/palappprofile.cpp +++ b/device/pal/palappprofile.cpp @@ -1,4 +1,4 @@ -/* Copyright (c) 2015-present Advanced Micro Devices, Inc. +/* Copyright (c) 2015 - 2021 Advanced Micro Devices, Inc. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/device/pal/palappprofile.hpp b/device/pal/palappprofile.hpp index b10dad5c9..66fb3fe7d 100644 --- a/device/pal/palappprofile.hpp +++ b/device/pal/palappprofile.hpp @@ -1,4 +1,4 @@ -/* Copyright (c) 2015-present Advanced Micro Devices, Inc. +/* Copyright (c) 2015 - 2021 Advanced Micro Devices, Inc. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/device/pal/palblit.cpp b/device/pal/palblit.cpp index a89076e70..ad64ced84 100644 --- a/device/pal/palblit.cpp +++ b/device/pal/palblit.cpp @@ -1,4 +1,4 @@ -/* Copyright (c) 2015-present Advanced Micro Devices, Inc. +/* Copyright (c) 2015 - 2021 Advanced Micro Devices, Inc. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/device/pal/palblit.hpp b/device/pal/palblit.hpp index 571ce8020..2a0bdd272 100644 --- a/device/pal/palblit.hpp +++ b/device/pal/palblit.hpp @@ -1,4 +1,4 @@ -/* Copyright (c) 2015-present Advanced Micro Devices, Inc. +/* Copyright (c) 2015 - 2021 Advanced Micro Devices, Inc. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/device/pal/palconstbuf.cpp b/device/pal/palconstbuf.cpp index 8204efb3a..7acb7ee0a 100644 --- a/device/pal/palconstbuf.cpp +++ b/device/pal/palconstbuf.cpp @@ -1,4 +1,4 @@ -/* Copyright (c) 2010-present Advanced Micro Devices, Inc. +/* Copyright (c) 2010 - 2021 Advanced Micro Devices, Inc. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/device/pal/palconstbuf.hpp b/device/pal/palconstbuf.hpp index c42ddae95..7acb8674c 100644 --- a/device/pal/palconstbuf.hpp +++ b/device/pal/palconstbuf.hpp @@ -1,4 +1,4 @@ -/* Copyright (c) 2015-present Advanced Micro Devices, Inc. +/* Copyright (c) 2015 - 2021 Advanced Micro Devices, Inc. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/device/pal/palcounters.cpp b/device/pal/palcounters.cpp index b57446054..3da038efc 100644 --- a/device/pal/palcounters.cpp +++ b/device/pal/palcounters.cpp @@ -1,4 +1,4 @@ -/* Copyright (c) 2015-present Advanced Micro Devices, Inc. +/* Copyright (c) 2015 - 2021 Advanced Micro Devices, Inc. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/device/pal/palcounters.hpp b/device/pal/palcounters.hpp index d3adc3b6c..b332f7bde 100644 --- a/device/pal/palcounters.hpp +++ b/device/pal/palcounters.hpp @@ -1,4 +1,4 @@ -/* Copyright (c) 2015-present Advanced Micro Devices, Inc. +/* Copyright (c) 2015 - 2021 Advanced Micro Devices, Inc. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/device/pal/paldebugger.hpp b/device/pal/paldebugger.hpp index 29e964837..33cf9dee7 100644 --- a/device/pal/paldebugger.hpp +++ b/device/pal/paldebugger.hpp @@ -1,4 +1,4 @@ -/* Copyright (c) 2015-present Advanced Micro Devices, Inc. +/* Copyright (c) 2015 - 2021 Advanced Micro Devices, Inc. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/device/pal/paldebugmanager.cpp b/device/pal/paldebugmanager.cpp index 11419842a..2d0f137c3 100644 --- a/device/pal/paldebugmanager.cpp +++ b/device/pal/paldebugmanager.cpp @@ -1,4 +1,4 @@ -/* Copyright (c) 2015-present Advanced Micro Devices, Inc. +/* Copyright (c) 2015 - 2021 Advanced Micro Devices, Inc. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/device/pal/paldebugmanager.hpp b/device/pal/paldebugmanager.hpp index 6c800997a..1c85a045b 100644 --- a/device/pal/paldebugmanager.hpp +++ b/device/pal/paldebugmanager.hpp @@ -1,4 +1,4 @@ -/* Copyright (c) 2015-present Advanced Micro Devices, Inc. +/* Copyright (c) 2015 - 2021 Advanced Micro Devices, Inc. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/device/pal/paldefs.hpp b/device/pal/paldefs.hpp index 55a71c50a..19e15b325 100644 --- a/device/pal/paldefs.hpp +++ b/device/pal/paldefs.hpp @@ -1,4 +1,4 @@ -/* Copyright (c) 2015-present Advanced Micro Devices, Inc. +/* Copyright (c) 2015 - 2021 Advanced Micro Devices, Inc. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/device/pal/paldevice.cpp b/device/pal/paldevice.cpp index 2c2b3964f..33906557d 100644 --- a/device/pal/paldevice.cpp +++ b/device/pal/paldevice.cpp @@ -1,4 +1,4 @@ -/* Copyright (c) 2008-present Advanced Micro Devices, Inc. +/* Copyright (c) 2008 - 2021 Advanced Micro Devices, Inc. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal @@ -93,7 +93,7 @@ static constexpr PalDevice supportedPalDevices[] = { {9, 0, 2, Pal::GfxIpLevel::GfxIp9, "gfx902", Pal::AsicRevision::Raven}, {9, 0, 4, Pal::GfxIpLevel::GfxIp9, "gfx904", Pal::AsicRevision::Vega12}, {9, 0, 6, Pal::GfxIpLevel::GfxIp9, "gfx906", Pal::AsicRevision::Vega20}, - {9, 0, 9, Pal::GfxIpLevel::GfxIp9, "gfx909", Pal::AsicRevision::Raven2}, + {9, 0, 2, Pal::GfxIpLevel::GfxIp9, "gfx902", Pal::AsicRevision::Raven2}, {9, 0, 12, Pal::GfxIpLevel::GfxIp9, "gfx90c", Pal::AsicRevision::Renoir}, {10, 1, 0, Pal::GfxIpLevel::GfxIp10_1, "gfx1010", Pal::AsicRevision::Navi10}, {10, 1, 1, Pal::GfxIpLevel::GfxIp10_1, "gfx1011", Pal::AsicRevision::Navi12}, @@ -101,6 +101,7 @@ static constexpr PalDevice supportedPalDevices[] = { {10, 3, 0, Pal::GfxIpLevel::GfxIp10_3, "gfx1030", Pal::AsicRevision::Navi21}, {10, 3, 1, Pal::GfxIpLevel::GfxIp10_3, "gfx1031", Pal::AsicRevision::Navi22}, {10, 3, 2, Pal::GfxIpLevel::GfxIp10_3, "gfx1032", Pal::AsicRevision::Navi23}, + {10, 3, 4, Pal::GfxIpLevel::GfxIp10_3, "gfx1034", Pal::AsicRevision::Navi24}, }; static std::tuple findIsa(Pal::AsicRevision asicRevision, @@ -1395,7 +1396,8 @@ pal::Memory* Device::createBuffer(amd::Memory& owner, bool directAccess) const { return nullptr; } - if (nullptr != owner.parent()->getSvmPtr()) { + if ((nullptr != owner.parent()->getSvmPtr()) && + (owner.parent()->getContext().devices().size() > 1)) { amd::Memory* amdParent = owner.parent(); { // Lock memory object, so only one commitment will occur diff --git a/device/pal/paldevice.hpp b/device/pal/paldevice.hpp index 052577e72..e9f1adf5d 100644 --- a/device/pal/paldevice.hpp +++ b/device/pal/paldevice.hpp @@ -1,4 +1,4 @@ -/* Copyright (c) 2015-present Advanced Micro Devices, Inc. +/* Copyright (c) 2015 - 2021 Advanced Micro Devices, Inc. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal @@ -43,6 +43,11 @@ #include #include +#if defined(__clang__) +#if __has_feature(address_sanitizer) +#include "device/devurilocator.hpp" +#endif +#endif /*! \addtogroup PAL * @{ */ @@ -145,7 +150,13 @@ class NullDevice : public amd::Device { cl_set_device_clock_mode_output_amd* pSetClockModeOutput) { return true; } - +#if defined(__clang__) +#if __has_feature(address_sanitizer) + virtual device::UriLocator* createUriLocator() const { + return nullptr; + } +#endif +#endif protected: static Util::GenericAllocator allocator_; //!< Generic memory allocator in PAL @@ -593,7 +604,13 @@ class Device : public NullDevice { virtual bool importExtSemaphore(void** extSemaphore, const amd::Os::FileDesc& handle); virtual void DestroyExtSemaphore(void* extSemaphore); - +#if defined(__clang__) +#if __has_feature(address_sanitizer) + virtual device::UriLocator* createUrilocator() const { + return nullptr; + } +#endif +#endif private: static void PAL_STDCALL PalDeveloperCallback(void* pPrivateData, const Pal::uint32 deviceIndex, Pal::Developer::CallbackType type, void* pCbData); diff --git a/device/pal/paldeviced3d10.cpp b/device/pal/paldeviced3d10.cpp index 5d391619b..a454c27c7 100644 --- a/device/pal/paldeviced3d10.cpp +++ b/device/pal/paldeviced3d10.cpp @@ -1,4 +1,4 @@ -/* Copyright (c) 2015-present Advanced Micro Devices, Inc. +/* Copyright (c) 2015 - 2021 Advanced Micro Devices, Inc. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/device/pal/paldeviced3d11.cpp b/device/pal/paldeviced3d11.cpp index e74292c83..f9fa1e537 100644 --- a/device/pal/paldeviced3d11.cpp +++ b/device/pal/paldeviced3d11.cpp @@ -1,4 +1,4 @@ -/* Copyright (c) 2015-present Advanced Micro Devices, Inc. +/* Copyright (c) 2015 - 2021 Advanced Micro Devices, Inc. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/device/pal/paldeviced3d9.cpp b/device/pal/paldeviced3d9.cpp index cc243082b..dd96a44f5 100644 --- a/device/pal/paldeviced3d9.cpp +++ b/device/pal/paldeviced3d9.cpp @@ -1,4 +1,4 @@ -/* Copyright (c) 2015-present Advanced Micro Devices, Inc. +/* Copyright (c) 2015 - 2021 Advanced Micro Devices, Inc. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/device/pal/paldevicegl.cpp b/device/pal/paldevicegl.cpp index fe031aaa9..e8a95d0e6 100644 --- a/device/pal/paldevicegl.cpp +++ b/device/pal/paldevicegl.cpp @@ -1,4 +1,4 @@ -/* Copyright (c) 2015-present Advanced Micro Devices, Inc. +/* Copyright (c) 2015 - 2021 Advanced Micro Devices, Inc. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/device/pal/palgpuopen.cpp b/device/pal/palgpuopen.cpp index 40b4e6f1e..3862878b2 100644 --- a/device/pal/palgpuopen.cpp +++ b/device/pal/palgpuopen.cpp @@ -1,4 +1,4 @@ -/* Copyright (c) 2016-present Advanced Micro Devices, Inc. +/* Copyright (c) 2016 - 2021 Advanced Micro Devices, Inc. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/device/pal/palgpuopen.hpp b/device/pal/palgpuopen.hpp index 5c8d1f6e7..9705f6747 100644 --- a/device/pal/palgpuopen.hpp +++ b/device/pal/palgpuopen.hpp @@ -1,4 +1,4 @@ -/* Copyright (c) 2016-present Advanced Micro Devices, Inc. +/* Copyright (c) 2016 - 2021 Advanced Micro Devices, Inc. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/device/pal/palkernel.cpp b/device/pal/palkernel.cpp index bf7090a48..e4d6dbdfc 100644 --- a/device/pal/palkernel.cpp +++ b/device/pal/palkernel.cpp @@ -1,4 +1,4 @@ -/* Copyright (c) 2015-present Advanced Micro Devices, Inc. +/* Copyright (c) 2015 - 2021 Advanced Micro Devices, Inc. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/device/pal/palkernel.hpp b/device/pal/palkernel.hpp index 3c262f638..5a05bfef6 100644 --- a/device/pal/palkernel.hpp +++ b/device/pal/palkernel.hpp @@ -1,4 +1,4 @@ -/* Copyright (c) 2015-present Advanced Micro Devices, Inc. +/* Copyright (c) 2015 - 2021 Advanced Micro Devices, Inc. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/device/pal/palmemory.cpp b/device/pal/palmemory.cpp index 9774d2501..29a4fe201 100644 --- a/device/pal/palmemory.cpp +++ b/device/pal/palmemory.cpp @@ -1,4 +1,4 @@ -/* Copyright (c) 2015-present Advanced Micro Devices, Inc. +/* Copyright (c) 2015 - 2021 Advanced Micro Devices, Inc. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal @@ -786,12 +786,16 @@ void* Memory::allocMapTarget(const amd::Coord3D& origin, const amd::Coord3D& reg mapAddress = reinterpret_cast
(owner()->getHostMem()); } // If resource is a persistent allocation, we can use it directly - else if (isPersistentDirectMap()) { + else if ((isPersistentDirectMap(mapFlags & CL_MAP_WRITE) && (getMapCount() == 0)) || + isPersistentMapped()) { if (nullptr == map(nullptr)) { LogError("Could not map target persistent resource"); decIndMapCount(); return nullptr; } + if (getMapCount() == 1) { + setPersistentMapFlag(true); + } mapAddress = data(); } // Otherwise we can use a remote resource: @@ -1046,14 +1050,17 @@ void* Image::allocMapTarget(const amd::Coord3D& origin, const amd::Coord3D& regi //! runtime can't use it directly, //! because CAL volume map doesn't work properly. //! @todo arrays can be added for persistent lock with some CAL changes - else if (isPersistentDirectMap()) { + else if((isPersistentDirectMap(mapFlags & CL_MAP_WRITE) && (getMapCount() == 0)) || + isPersistentMapped()) { if (nullptr == map(nullptr)) { useRemoteResource = true; LogError("Could not map target persistent resource, try remote resource"); } else { useRemoteResource = false; mapAddress = data(); - + if (getMapCount() == 1) { + setPersistentMapFlag(true); + } // Calculate the offset in bytes offset *= elementSize(); @@ -1075,7 +1082,6 @@ void* Image::allocMapTarget(const amd::Coord3D& origin, const amd::Coord3D& regi const static bool SysMem = true; bool failed = false; amd::Memory* memory; - // Search for a possible indirect resource memory = dev().findMapTarget(owner()->getSize()); diff --git a/device/pal/palmemory.hpp b/device/pal/palmemory.hpp index 53f169c7c..0c1cbd46a 100644 --- a/device/pal/palmemory.hpp +++ b/device/pal/palmemory.hpp @@ -1,4 +1,4 @@ -/* Copyright (c) 2015-present Advanced Micro Devices, Inc. +/* Copyright (c) 2015 - 2021 Advanced Micro Devices, Inc. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal @@ -97,6 +97,16 @@ class Memory : public device::Memory, public Resource { size_t* slicePitch = NULL //!< Slice for the mapped memory ); + virtual bool isPersistentMapped() const { return (flags_ & PersistentMap) ? true : false; } + virtual void setPersistentMapFlag(bool persistentMapped) { + if (persistentMapped == true) { + flags_ |= PersistentMap; + } + else { + flags_ &= ~PersistentMap; + } + } + //! Pins system memory associated with this memory object virtual bool pinSystemMemory(void* hostPtr, //!< System memory address size_t size //!< Size of allocated system memory diff --git a/device/pal/palprintf.cpp b/device/pal/palprintf.cpp index acdc6dbda..61f1fead0 100644 --- a/device/pal/palprintf.cpp +++ b/device/pal/palprintf.cpp @@ -1,4 +1,4 @@ -/* Copyright (c) 2015-present Advanced Micro Devices, Inc. +/* Copyright (c) 2015 - 2021 Advanced Micro Devices, Inc. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/device/pal/palprintf.hpp b/device/pal/palprintf.hpp index bf3d6ef04..5c7b899c4 100644 --- a/device/pal/palprintf.hpp +++ b/device/pal/palprintf.hpp @@ -1,4 +1,4 @@ -/* Copyright (c) 2015-present Advanced Micro Devices, Inc. +/* Copyright (c) 2015 - 2021 Advanced Micro Devices, Inc. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/device/pal/palprogram.cpp b/device/pal/palprogram.cpp index a97b3530f..f6e319b9b 100644 --- a/device/pal/palprogram.cpp +++ b/device/pal/palprogram.cpp @@ -1,4 +1,4 @@ -/* Copyright (c) 2015-present Advanced Micro Devices, Inc. +/* Copyright (c) 2015 - 2021 Advanced Micro Devices, Inc. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal @@ -508,6 +508,7 @@ bool HSAILProgram::createGlobalVarObj(amd::Memory** amd_mem_obj, void** device_p } /* Create a View from the global pal::Memory */ + flags = ROCCLR_MEM_INTERNAL_MEMORY; parent = codeSegGpu_->owner(); *amd_mem_obj = new (parent->getContext()) amd::Buffer(*parent, flags, offset, *bytes); diff --git a/device/pal/palprogram.hpp b/device/pal/palprogram.hpp index dd2a42990..69a8ce6fb 100644 --- a/device/pal/palprogram.hpp +++ b/device/pal/palprogram.hpp @@ -1,4 +1,4 @@ -/* Copyright (c) 2015-present Advanced Micro Devices, Inc. +/* Copyright (c) 2015 - 2021 Advanced Micro Devices, Inc. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/device/pal/palresource.cpp b/device/pal/palresource.cpp index 5148d47ec..a3de8b86e 100644 --- a/device/pal/palresource.cpp +++ b/device/pal/palresource.cpp @@ -1,4 +1,4 @@ -/* Copyright (c) 2015-present Advanced Micro Devices, Inc. +/* Copyright (c) 2015 - 2021 Advanced Micro Devices, Inc. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal @@ -930,6 +930,9 @@ bool Resource::CreateInterop(CreateParams* params) { imgOpenInfo.swizzledFormat.swizzle = channels; imgOpenInfo.usage.shaderRead = true; imgOpenInfo.usage.shaderWrite = true; +#if defined(__unix__) + imgOpenInfo.resourceInfo.handleType = Pal::HandleType::DmaBufFd; +#endif memRef_ = GpuMemoryReference::Create(dev(), imgOpenInfo, &imgCreateInfo, &image_); if (nullptr == memRef_) { return false; @@ -1821,9 +1824,9 @@ bool Resource::isMemoryType(MemoryType memType) const { } // ================================================================================================ -bool Resource::isPersistentDirectMap() const { - bool directMap = - ((memoryType() == Resource::Persistent) && (desc().dimSize_ < 3) && !desc().imageArray_); +bool Resource::isPersistentDirectMap(bool writeMap) const { + bool directMap = ((memoryType() == Resource::Persistent) && + (desc().dimSize_ < 3) && !desc().imageArray_ && writeMap); // If direct map is possible, then validate it with the current tiling if (directMap && desc().tiled_) { @@ -1926,6 +1929,22 @@ bool MemorySubAllocator::InitAllocator(GpuMemoryReference* mem_ref) { return true; } +// ================================================================================================ +void MemorySubAllocator::forceResident(GpuMemoryReference* mem_ref) { + if (IS_WINDOWS) { + // Write one DWORD using CPDMA to force resident + GpuEvent event; + auto gpu = device_->xferQueue(); + uint32_t data = 0; + + gpu->eventBegin(MainEngine); + gpu->queue(MainEngine).addCmdMemRef(mem_ref); + gpu->iCmd()->CmdUpdateMemory(*mem_ref->iMem(), 0, 4, &data); + gpu->eventEnd(MainEngine, event); + gpu->waitForEvent(&event); + } +} + // ================================================================================================ bool MemorySubAllocator::CreateChunk(const Pal::IGpuMemory* reserved_va) { Pal::GpuMemoryCreateInfo createInfo = {}; @@ -1941,6 +1960,8 @@ bool MemorySubAllocator::CreateChunk(const Pal::IGpuMemory* reserved_va) { createInfo.mallPolicy = static_cast(device_->settings().mallPolicy_); GpuMemoryReference* mem_ref = GpuMemoryReference::Create(*device_, createInfo); if (mem_ref != nullptr) { + // Workaround: some chunk memory are not guaranteed to be resident during initial allocation. + forceResident(mem_ref); return InitAllocator(mem_ref); } return false; @@ -1961,6 +1982,8 @@ bool CoarseMemorySubAllocator::CreateChunk(const Pal::IGpuMemory* reserved_va) { createInfo.mallPolicy = static_cast(device_->settings().mallPolicy_); GpuMemoryReference* mem_ref = GpuMemoryReference::Create(*device_, createInfo); if (mem_ref != nullptr) { + // Workaround: some chunk memory are not guaranteed to be resident during initial allocation. + forceResident(mem_ref); return InitAllocator(mem_ref); } return false; @@ -1977,6 +2000,8 @@ bool FineMemorySubAllocator::CreateChunk(const Pal::IGpuMemory* reserved_va) { createInfo.mallPolicy = Pal::GpuMemMallPolicy::Never; GpuMemoryReference* mem_ref = GpuMemoryReference::Create(*device_, createInfo); if ((mem_ref != nullptr) && InitAllocator(mem_ref)) { + // Workaround: some chunk memory are not guaranteed to be resident during initial allocation. + forceResident(mem_ref); mem_ref->iMem()->Map(&mem_ref->cpuAddress_); return mem_ref->cpuAddress_ != nullptr; } @@ -1995,6 +2020,8 @@ bool FineUncachedMemorySubAllocator::CreateChunk(const Pal::IGpuMemory* reserved createInfo.mallPolicy = Pal::GpuMemMallPolicy::Never; GpuMemoryReference* mem_ref = GpuMemoryReference::Create(*device_, createInfo); if ((mem_ref != nullptr) && InitAllocator(mem_ref)) { + // Workaround: some chunk memory are not guaranteed to be resident during initial allocation. + forceResident(mem_ref); mem_ref->iMem()->Map(&mem_ref->cpuAddress_); return mem_ref->cpuAddress_ != nullptr; } diff --git a/device/pal/palresource.hpp b/device/pal/palresource.hpp index ec11a1a24..c7108368b 100644 --- a/device/pal/palresource.hpp +++ b/device/pal/palresource.hpp @@ -1,4 +1,4 @@ -/* Copyright (c) 2015-present Advanced Micro Devices, Inc. +/* Copyright (c) 2015 - 2021 Advanced Micro Devices, Inc. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal @@ -296,7 +296,8 @@ class Resource : public amd::HeapObject { bool mipMapped() const { return (desc().mipLevels_ > 1) ? true : false; } //! Checks if persistent memory can have a direct map - bool isPersistentDirectMap() const; + bool isPersistentDirectMap(bool writeMap = true) const; + int getMapCount() const { return mapCount_; } /*! \brief Locks the resource and returns a physical pointer * @@ -528,6 +529,7 @@ class MemorySubAllocator : public amd::HeapObject { //! Allocate new chunk of memory virtual bool CreateChunk(const Pal::IGpuMemory* reserved_va); bool InitAllocator(GpuMemoryReference* mem_ref); + void forceResident(GpuMemoryReference* mem_ref); Device* device_; std::unordered_map heaps_; diff --git a/device/pal/palsched.hpp b/device/pal/palsched.hpp index 4f14f41a5..abcab0ded 100644 --- a/device/pal/palsched.hpp +++ b/device/pal/palsched.hpp @@ -1,4 +1,4 @@ -/* Copyright (c) 2015-present Advanced Micro Devices, Inc. +/* Copyright (c) 2015 - 2021 Advanced Micro Devices, Inc. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/device/pal/palschedcl.cpp b/device/pal/palschedcl.cpp index 672144d8e..344ee02d8 100644 --- a/device/pal/palschedcl.cpp +++ b/device/pal/palschedcl.cpp @@ -1,4 +1,4 @@ -/* Copyright (c) 2015-present Advanced Micro Devices, Inc. +/* Copyright (c) 2015 - 2021 Advanced Micro Devices, Inc. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/device/pal/palsettings.cpp b/device/pal/palsettings.cpp index 5bb658529..01997fd30 100644 --- a/device/pal/palsettings.cpp +++ b/device/pal/palsettings.cpp @@ -1,4 +1,4 @@ -/* Copyright (c) 2015-present Advanced Micro Devices, Inc. +/* Copyright (c) 2015 - 2021 Advanced Micro Devices, Inc. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal @@ -202,6 +202,7 @@ bool Settings::create(const Pal::DeviceProperties& palProp, case Pal::AsicRevision::Navi12: case Pal::AsicRevision::Navi10: case Pal::AsicRevision::Navi10_A0: + case Pal::AsicRevision::Navi24: case Pal::AsicRevision::Navi23: case Pal::AsicRevision::Navi22: case Pal::AsicRevision::Navi21: diff --git a/device/pal/palsettings.hpp b/device/pal/palsettings.hpp index 4c871d57f..c682dc3d3 100644 --- a/device/pal/palsettings.hpp +++ b/device/pal/palsettings.hpp @@ -1,4 +1,4 @@ -/* Copyright (c) 2015-present Advanced Micro Devices, Inc. +/* Copyright (c) 2015 - 2021 Advanced Micro Devices, Inc. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/device/pal/palsignal.cpp b/device/pal/palsignal.cpp index 53a9cefc8..076464741 100644 --- a/device/pal/palsignal.cpp +++ b/device/pal/palsignal.cpp @@ -1,4 +1,4 @@ -/* Copyright (c) 2021-present Advanced Micro Devices, Inc. +/* Copyright (c) 2021 - 2021 Advanced Micro Devices, Inc. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal @@ -29,6 +29,21 @@ namespace pal { Signal::~Signal() { dev_->context().svmFree(amdSignal_); + + if (ws_ == device::Signal::WaitState::Blocked) { +#if defined(_WIN32) + Pal::Result result = Pal::Result::Success; + + Pal::UnregisterEventInfo eventInfo = {}; + eventInfo.pEvent = &event_; + eventInfo.trackingType = Pal::EventTrackingType::ShaderInterrupt; + result = dev_->iDev()->UnregisterEvent(eventInfo); + if (result != Pal::Result::Success) { + ClPrint(amd::LOG_ERROR, amd::LOG_QUEUE, + "Failed to unregister SQ event needed for hostcall buffer"); + } +#endif + } } bool Signal::Init(const amd::Device& dev, uint64_t init, device::Signal::WaitState ws) { @@ -47,6 +62,47 @@ bool Signal::Init(const amd::Device& dev, uint64_t init, device::Signal::WaitSta amdSignal_ = new (buffer) amd_signal_t(); amdSignal_->value = init; + if (ws_ == device::Signal::WaitState::Blocked) { +#if defined(_WIN32) + Pal::Result result = Pal::Result::Success; + + Util::EventCreateFlags flags = {}; + flags.manualReset = false; + flags.initiallySignaled = false; + result = event_.Init(flags); + if (result != Pal::Result::Success) { + ClPrint(amd::LOG_ERROR, amd::LOG_QUEUE, + "Failed to create Pal::Util::Event needed for hostcall buffer"); + return false; + } + + result = event_.Set(); + if (result != Pal::Result::Success) { + ClPrint(amd::LOG_ERROR, amd::LOG_QUEUE, + "Failed to set Pal::Util::Event needed for hostcall buffer"); + return false; + } + + Pal::RegisterEventInfo eventInputInfo = {}; + eventInputInfo.pEvent = &event_; + eventInputInfo.trackingType = Pal::EventTrackingType::ShaderInterrupt; + Pal::RegisterEventOutputInfo eventOutputInfo = {}; + result = dev_->iDev()->RegisterEvent( + eventInputInfo, + &eventOutputInfo); + if (result != Pal::Result::Success) { + ClPrint(amd::LOG_ERROR, amd::LOG_QUEUE, + "Failed to register SQ event needed for hostcall buffer"); + return false; + } + amdSignal_->event_id = eventOutputInfo.shaderInterrupt.eventId; + amdSignal_->event_mailbox_ptr = eventOutputInfo.shaderInterrupt.eventMailboxGpuVa; + ClPrint(amd::LOG_INFO, amd::LOG_INIT, + "Registered SQ event %d with mailbox slot %p", + amdSignal_->event_id, amdSignal_->event_mailbox_ptr); +#endif + } + return true; } @@ -67,7 +123,19 @@ uint64_t Signal::Wait(uint64_t value, device::Signal::Condition c, uint64_t time } (c); if (ws_ == device::Signal::WaitState::Blocked) { - guarantee(false, "Unimplemented"); +#if defined(_WIN32) + Pal::Result result = Pal::Result::Success; + + float timeoutInSec = timeout / (1000 * 1000); + result = event_.Wait(timeoutInSec); + + if (result != Pal::Result::Success) { + return -1; + } + + std::atomic_thread_fence(std::memory_order_acquire); + return amdSignal_->value; +#endif } else if (ws_ == device::Signal::WaitState::Active) { auto start = amd::Os::timeNanos(); while (true) { diff --git a/device/pal/palsignal.hpp b/device/pal/palsignal.hpp index b02a43b1f..781cbd6b1 100644 --- a/device/pal/palsignal.hpp +++ b/device/pal/palsignal.hpp @@ -1,4 +1,4 @@ -/* Copyright (c) 2021-present Advanced Micro Devices, Inc. +/* Copyright (c) 2021 - 2021 Advanced Micro Devices, Inc. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal @@ -24,6 +24,8 @@ #include +#include "palEvent.h" + namespace pal { class Device; @@ -32,6 +34,7 @@ class Signal: public device::Signal { private: const Device* dev_; amd_signal_t* amdSignal_; + Util::Event event_; public: ~Signal() override; diff --git a/device/pal/palthreadtrace.cpp b/device/pal/palthreadtrace.cpp index d4cda4781..aee42e1a0 100644 --- a/device/pal/palthreadtrace.cpp +++ b/device/pal/palthreadtrace.cpp @@ -1,4 +1,4 @@ -/* Copyright (c) 2015-present Advanced Micro Devices, Inc. +/* Copyright (c) 2015 - 2021 Advanced Micro Devices, Inc. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/device/pal/palthreadtrace.hpp b/device/pal/palthreadtrace.hpp index 27680b974..e0b71c8f6 100644 --- a/device/pal/palthreadtrace.hpp +++ b/device/pal/palthreadtrace.hpp @@ -1,4 +1,4 @@ -/* Copyright (c) 2015-present Advanced Micro Devices, Inc. +/* Copyright (c) 2015 - 2021 Advanced Micro Devices, Inc. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/device/pal/paltimestamp.cpp b/device/pal/paltimestamp.cpp index e9726afcc..37af64b8c 100644 --- a/device/pal/paltimestamp.cpp +++ b/device/pal/paltimestamp.cpp @@ -1,4 +1,4 @@ -/* Copyright (c) 2015-present Advanced Micro Devices, Inc. +/* Copyright (c) 2015 - 2021 Advanced Micro Devices, Inc. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/device/pal/paltimestamp.hpp b/device/pal/paltimestamp.hpp index add6bd5f8..183051389 100644 --- a/device/pal/paltimestamp.hpp +++ b/device/pal/paltimestamp.hpp @@ -1,4 +1,4 @@ -/* Copyright (c) 2015-present Advanced Micro Devices, Inc. +/* Copyright (c) 2015 - 2021 Advanced Micro Devices, Inc. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/device/pal/paltrap.hpp b/device/pal/paltrap.hpp index e62cfb59b..783ec5fe4 100644 --- a/device/pal/paltrap.hpp +++ b/device/pal/paltrap.hpp @@ -1,4 +1,4 @@ -/* Copyright (c) 2015-present Advanced Micro Devices, Inc. +/* Copyright (c) 2015 - 2021 Advanced Micro Devices, Inc. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/device/pal/palvirtual.cpp b/device/pal/palvirtual.cpp index 4ddd805f8..898367b71 100644 --- a/device/pal/palvirtual.cpp +++ b/device/pal/palvirtual.cpp @@ -1,4 +1,4 @@ -/* Copyright (c) 2015-present Advanced Micro Devices, Inc. +/* Copyright (c) 2015 - 2021 Advanced Micro Devices, Inc. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal @@ -1620,7 +1620,7 @@ void VirtualGPU::submitMapMemory(amd::MapMemoryCommand& vcmd) { // Add memory to VA cache, so rutnime can detect direct access to VA dev().addVACache(memory); - } else if (memory->isPersistentDirectMap()) { + } else if (memory->isPersistentMapped()) { // Nothing to do here } else if (memory->mapMemory() != nullptr) { // Target is a remote resource, so copy @@ -1721,10 +1721,13 @@ void VirtualGPU::submitUnmapMemory(amd::UnmapMemoryCommand& vcmd) { } // data check was added for persistent memory that failed to get aperture // and therefore are treated like a remote resource - else if (memory->isPersistentDirectMap() && (memory->data() != nullptr)) { + else if (memory->isPersistentMapped()) { // Map/unmap must be serialized amd::ScopedLock lock(owner->lockMemoryOps()); memory->unmap(this); + if (memory->getMapCount() == 0) { + memory->setPersistentMapFlag(false); + } } else if (memory->mapMemory() != nullptr) { if (writeMapInfo->isUnmapWrite()) { amd::Coord3D srcOrigin(0, 0, 0); diff --git a/device/pal/palvirtual.hpp b/device/pal/palvirtual.hpp index 82098b1ad..87538a2d4 100644 --- a/device/pal/palvirtual.hpp +++ b/device/pal/palvirtual.hpp @@ -1,4 +1,4 @@ -/* Copyright (c) 2015-present Advanced Micro Devices, Inc. +/* Copyright (c) 2015 - 2021 Advanced Micro Devices, Inc. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/device/rocm/CMakeLists.txt b/device/rocm/CMakeLists.txt deleted file mode 100644 index 7d6a69904..000000000 --- a/device/rocm/CMakeLists.txt +++ /dev/null @@ -1,72 +0,0 @@ -# Copyright (C) 2017-2021 Advanced Micro Devices, Inc. All Rights Reserved. -# -# Permission is hereby granted, free of charge, to any person obtaining a copy -# of this software and associated documentation files (the "Software"), to deal -# in the Software without restriction, including without limitation the rights -# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -# copies of the Software, and to permit persons to whom the Software is -# furnished to do so, subject to the following conditions: -# -# The above copyright notice and this permission notice shall be included in -# all copies or substantial portions of the Software. -# -# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -# THE SOFTWARE. - -add_library(oclrocm OBJECT - roccounters.cpp - rocprintf.cpp - rocprogram.cpp - rocmemory.cpp - rocdevice.cpp - rocblit.cpp - rockernel.cpp - rocvirtual.cpp - rocglinterop.cpp - rocappprofile.cpp - rocsettings.cpp - rocschedcl.cpp - rocsignal.cpp -) - -target_include_directories(oclrocm - PUBLIC - $ - $ - $ - # GL and EGL headers - $ - $ - $ - PRIVATE - ${OPENCL_DIR} - ${PROJECT_SOURCE_DIR}/compiler/lib - ${PROJECT_SOURCE_DIR}/compiler/lib/backends/common - ${PROJECT_SOURCE_DIR}/elf - ${CMAKE_CURRENT_BINARY_DIR} - ${ROCM_OCL_INCLUDES} - $) - - -if(USE_COMGR_LIBRARY) - if(${BUILD_SHARED_LIBS}) - target_compile_definitions(oclrocm PRIVATE USE_COMGR_LIBRARY COMGR_DYN_DLL) - else() - target_compile_definitions(oclrocm PRIVATE USE_COMGR_LIBRARY) - endif() -endif() - -if (UNIX) - find_library(LIBNUMA numa) - if (LIBNUMA) - target_compile_definitions(oclrocm PRIVATE ROCCLR_SUPPORT_NUMA_POLICY) - message(STATUS "Found: ${LIBNUMA}") - endif() -endif() - -set_target_properties(oclrocm PROPERTIES POSITION_INDEPENDENT_CODE ON) diff --git a/device/rocm/mesa_glinterop.h b/device/rocm/mesa_glinterop.h index bf3003d01..cf6669e48 100644 --- a/device/rocm/mesa_glinterop.h +++ b/device/rocm/mesa_glinterop.h @@ -1,4 +1,4 @@ -/* Copyright (c) 2016-present Advanced Micro Devices, Inc. +/* Copyright (c) 2016 - 2021 Advanced Micro Devices, Inc. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/device/rocm/pro/lnxheaders.h b/device/rocm/pro/lnxheaders.h index afa4e894e..8c9fbe141 100644 --- a/device/rocm/pro/lnxheaders.h +++ b/device/rocm/pro/lnxheaders.h @@ -1,4 +1,4 @@ -/* Copyright (c) 2017-present Advanced Micro Devices, Inc. +/* Copyright (c) 2017 - 2021 Advanced Micro Devices, Inc. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/device/rocm/pro/prodevice.cpp b/device/rocm/pro/prodevice.cpp index ad963100e..465735c8c 100644 --- a/device/rocm/pro/prodevice.cpp +++ b/device/rocm/pro/prodevice.cpp @@ -1,4 +1,4 @@ -/* Copyright (c) 2017-present Advanced Micro Devices, Inc. +/* Copyright (c) 2017 - 2021 Advanced Micro Devices, Inc. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/device/rocm/pro/prodevice.hpp b/device/rocm/pro/prodevice.hpp index 34cc83938..80ff3600c 100644 --- a/device/rocm/pro/prodevice.hpp +++ b/device/rocm/pro/prodevice.hpp @@ -1,4 +1,4 @@ -/* Copyright (c) 2017-present Advanced Micro Devices, Inc. +/* Copyright (c) 2017 - 2021 Advanced Micro Devices, Inc. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/device/rocm/pro/prodriver.hpp b/device/rocm/pro/prodriver.hpp index 1467e88f2..bc4206974 100644 --- a/device/rocm/pro/prodriver.hpp +++ b/device/rocm/pro/prodriver.hpp @@ -1,4 +1,4 @@ -/* Copyright (c) 2017-present Advanced Micro Devices, Inc. +/* Copyright (c) 2017 - 2021 Advanced Micro Devices, Inc. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/device/rocm/pro/profuncs.hpp b/device/rocm/pro/profuncs.hpp index 864b167f6..e878df0c9 100644 --- a/device/rocm/pro/profuncs.hpp +++ b/device/rocm/pro/profuncs.hpp @@ -1,4 +1,4 @@ -/* Copyright (c) 2017-present Advanced Micro Devices, Inc. +/* Copyright (c) 2017 - 2021 Advanced Micro Devices, Inc. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/device/rocm/rocappprofile.cpp b/device/rocm/rocappprofile.cpp index 26579aa29..0ac1b42b9 100644 --- a/device/rocm/rocappprofile.cpp +++ b/device/rocm/rocappprofile.cpp @@ -1,4 +1,4 @@ -/* Copyright (c) 2009-present Advanced Micro Devices, Inc. +/* Copyright (c) 2009 - 2021 Advanced Micro Devices, Inc. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/device/rocm/rocappprofile.hpp b/device/rocm/rocappprofile.hpp index ab62fa9f2..67643f30e 100644 --- a/device/rocm/rocappprofile.hpp +++ b/device/rocm/rocappprofile.hpp @@ -1,4 +1,4 @@ -/* Copyright (c) 2009-present Advanced Micro Devices, Inc. +/* Copyright (c) 2009 - 2021 Advanced Micro Devices, Inc. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/device/rocm/rocblit.cpp b/device/rocm/rocblit.cpp index 2f2f54f92..c89da0cf3 100644 --- a/device/rocm/rocblit.cpp +++ b/device/rocm/rocblit.cpp @@ -1,4 +1,4 @@ -/* Copyright (c) 2015-present Advanced Micro Devices, Inc. +/* Copyright (c) 2015 - 2021 Advanced Micro Devices, Inc. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/device/rocm/rocblit.hpp b/device/rocm/rocblit.hpp index 24959350e..5c14bee2c 100644 --- a/device/rocm/rocblit.hpp +++ b/device/rocm/rocblit.hpp @@ -1,4 +1,4 @@ -/* Copyright (c) 2015-present Advanced Micro Devices, Inc. +/* Copyright (c) 2015 - 2021 Advanced Micro Devices, Inc. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/device/rocm/roccounters.cpp b/device/rocm/roccounters.cpp index 0cac41dda..6a976d547 100644 --- a/device/rocm/roccounters.cpp +++ b/device/rocm/roccounters.cpp @@ -1,4 +1,4 @@ -/* Copyright (c) 2017-present Advanced Micro Devices, Inc. +/* Copyright (c) 2017 - 2021 Advanced Micro Devices, Inc. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/device/rocm/roccounters.hpp b/device/rocm/roccounters.hpp index 5719cde66..ffc9fad09 100644 --- a/device/rocm/roccounters.hpp +++ b/device/rocm/roccounters.hpp @@ -1,4 +1,4 @@ -/* Copyright (c) 2017-present Advanced Micro Devices, Inc. +/* Copyright (c) 2017 - 2021 Advanced Micro Devices, Inc. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/device/rocm/rocdefs.hpp b/device/rocm/rocdefs.hpp index 8e9712fb6..b93156a52 100644 --- a/device/rocm/rocdefs.hpp +++ b/device/rocm/rocdefs.hpp @@ -1,4 +1,4 @@ -/* Copyright (c) 2008-present Advanced Micro Devices, Inc. +/* Copyright (c) 2008 - 2021 Advanced Micro Devices, Inc. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/device/rocm/rocdevice.cpp b/device/rocm/rocdevice.cpp index 6fe6dd203..c72e14c28 100644 --- a/device/rocm/rocdevice.cpp +++ b/device/rocm/rocdevice.cpp @@ -1,4 +1,4 @@ -/* Copyright (c) 2008-present Advanced Micro Devices, Inc. +/* Copyright (c) 2008 - 2021 Advanced Micro Devices, Inc. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal @@ -45,6 +45,12 @@ #endif #include "platform/sampler.hpp" +#if defined(__clang__) +#if __has_feature(address_sanitizer) +#include "device/rocm/rocurilocator.hpp" +#endif +#endif + #include #include #include @@ -1075,6 +1081,7 @@ Memory* Device::getGpuMemory(amd::Memory* mem) const { return static_cast(mem->getDeviceMemory(*this)); } +// ================================================================================================ bool Device::populateOCLDeviceConstants() { info_.available_ = true; @@ -1523,7 +1530,9 @@ bool Device::populateOCLDeviceConstants() { info_.cooperativeMultiDeviceGroups_ = settings().enableCoopMultiDeviceGroups_; // TODO: Update this to use HSA API when it is ready. For now limiting this to gfx9 - info_.aqlBarrierValue_ = (isa().versionMajor() == 9 && isa().versionMinor() == 0); + info_.aqlBarrierValue_ = (isa().versionMajor() == 9 && isa().versionMinor() == 0 && + (isa().versionStepping() == 0 || isa().versionStepping() == 4 || + isa().versionStepping() == 8 || isa().versionStepping() == 10)); } info_.maxPipePacketSize_ = info_.maxMemAllocSize_; @@ -1564,14 +1573,23 @@ bool Device::populateOCLDeviceConstants() { &info_.hmmCpuMemoryAccessible_)) { LogError("HSA_AMD_SYSTEM_INFO_SVM_ACCESSIBLE_BY_DEFAULT query failed."); } - LogPrintfInfo("HMM support: %d, xnack: %d\n", - info_.hmmSupported_, info_.hmmCpuMemoryAccessible_); + + // HMM specific capability for CPU direct access to device memory + if (HSA_STATUS_SUCCESS != hsa_agent_get_info(_bkendDevice, + static_cast(HSA_AMD_AGENT_INFO_SVM_DIRECT_HOST_ACCESS), + &info_.hmmDirectHostAccess_)) { + LogError("HSA_AMD_AGENT_INFO_SVM_DIRECT_HOST_ACCESS query failed."); + } + + LogPrintfInfo("HMM support: %d, xnack: %d, direct host access: %d\n", + info_.hmmSupported_, info_.hmmCpuMemoryAccessible_, info_.hmmDirectHostAccess_); info_.globalCUMask_ = {}; return true; } +// ================================================================================================ device::VirtualDevice* Device::createVirtualDevice(amd::CommandQueue* queue) { amd::ScopedLock lock(vgpusAccess()); @@ -2231,10 +2249,10 @@ bool Device::SetSvmAttributesInt(const void* dev_ptr, size_t count, switch (advice) { case amd::MemoryAdvice::SetReadMostly: - attr.push_back({HSA_AMD_SVM_ATTRIB_READ_ONLY, true}); + attr.push_back({HSA_AMD_SVM_ATTRIB_READ_MOSTLY, true}); break; case amd::MemoryAdvice::UnsetReadMostly: - attr.push_back({HSA_AMD_SVM_ATTRIB_READ_ONLY, false}); + attr.push_back({HSA_AMD_SVM_ATTRIB_READ_MOSTLY, false}); break; case amd::MemoryAdvice::SetPreferredLocation: if (use_cpu) { @@ -2258,7 +2276,10 @@ bool Device::SetSvmAttributesInt(const void* dev_ptr, size_t count, //! @note: HMM should support automatic page table update with xnack enabled, //! but currently it doesn't and runtime explicitly enables access from all devices for (const auto dev : devices()) { - attr.push_back({attrib, static_cast(dev)->getBackendDevice().handle}); + // Skip null devices + if (static_cast(dev)->getBackendDevice().handle != 0) { + attr.push_back({attrib, static_cast(dev)->getBackendDevice().handle}); + } } } else { attr.push_back({attrib, getBackendDevice().handle}); @@ -2270,6 +2291,12 @@ bool Device::SetSvmAttributesInt(const void* dev_ptr, size_t count, // @note: 0 may cause a failure on old runtimes attr.push_back({HSA_AMD_SVM_ATTRIB_AGENT_ACCESSIBLE_IN_PLACE, 0}); break; + case amd::MemoryAdvice::SetCoarseGrain: + attr.push_back({HSA_AMD_SVM_ATTRIB_GLOBAL_FLAG, HSA_AMD_SVM_GLOBAL_FLAG_COARSE_GRAINED}); + break; + case amd::MemoryAdvice::UnsetCoarseGrain: + attr.push_back({HSA_AMD_SVM_ATTRIB_GLOBAL_FLAG, HSA_AMD_SVM_GLOBAL_FLAG_FINE_GRAINED}); + break; default: return false; break; @@ -2278,7 +2305,7 @@ bool Device::SetSvmAttributesInt(const void* dev_ptr, size_t count, hsa_status_t status = hsa_amd_svm_attributes_set(const_cast(dev_ptr), count, attr.data(), attr.size()); if (status != HSA_STATUS_SUCCESS) { - LogPrintfError("hsa_amd_svm_attributes_set() failed. Advice: %d", advice); + LogPrintfError("hsa_amd_svm_attributes_set() failed. Advice: %d, status: %d", advice, status); return false; } } else { @@ -2511,6 +2538,22 @@ bool Device::SetClockMode(const cl_set_device_clock_mode_input_amd setClockModeI return result; } +// ================================================================================================ +bool Device::IsHwEventReady(const amd::Event& event, bool wait) const { + void* hw_event = (event.NotifyEvent() != nullptr) ? + event.NotifyEvent()->HwEvent() : event.HwEvent(); + if (hw_event == nullptr) { + ClPrint(amd::LOG_INFO, amd::LOG_SIG, "No HW event"); + return false; + } else if (wait) { + auto* vdev = event.command().queue()->vdev(); + WaitForSignal(reinterpret_cast(hw_event)->signal_, vdev->ActiveWait()); + return true; + } + return (hsa_signal_load_relaxed(reinterpret_cast(hw_event)->signal_) <= 0); +} + +// ================================================================================================ static void callbackQueue(hsa_status_t status, hsa_queue_t* queue, void* data) { if (status != HSA_STATUS_SUCCESS && status != HSA_STATUS_INFO_BREAK) { // Abort on device exceptions. @@ -2522,6 +2565,7 @@ static void callbackQueue(hsa_status_t status, hsa_queue_t* queue, void* data) { } } +// ================================================================================================ hsa_queue_t* Device::getQueueFromPool(const uint qIndex) { if (qIndex < QueuePriority::Total && queuePool_[qIndex].size() > 0) { typedef decltype(queuePool_)::value_type::const_reference PoolRef; @@ -2895,6 +2939,7 @@ bool Device::findLinkInfo(const hsa_amd_memory_pool_t& pool, return true; } +// ================================================================================================ void Device::getGlobalCUMask(std::string cuMaskStr) { if (cuMaskStr.length() != 0) { std::string pre = cuMaskStr.substr(0, 2); @@ -2947,10 +2992,12 @@ void Device::getGlobalCUMask(std::string cuMaskStr) { } } +// ================================================================================================ device::Signal* Device::createSignal() const { return new roc::Signal(); } +// ================================================================================================ amd::Memory* Device::GetArenaMemObj(const void* ptr, size_t& offset) { // If arena_mem_obj_ is null, then HMM and Xnack is disabled. Return nullptr. if (arena_mem_obj_ == nullptr) { @@ -2965,5 +3012,32 @@ amd::Memory* Device::GetArenaMemObj(const void* ptr, size_t& offset) { return arena_mem_obj_; } +// ================================================================================================ +void Device::ReleaseGlobalSignal(void* signal) const { + if (signal != nullptr) { + reinterpret_cast(signal)->release(); + } +} + +// ================================================================================================ +ProfilingSignal::~ProfilingSignal() { + if (signal_.handle != 0) { + if (hsa_signal_load_relaxed(signal_) > 0) { + LogError("Runtime shouldn't destroy a signal that is still busy!"); + if (hsa_signal_wait_scacquire(signal_, HSA_SIGNAL_CONDITION_LT, kInitSignalValueOne, + kUnlimitedWait, HSA_WAIT_STATE_BLOCKED) != 0) { + } + } + hsa_signal_destroy(signal_); + } +} + +#if defined(__clang__) +#if __has_feature(address_sanitizer) +device::UriLocator* Device::createUriLocator() const { + return new roc::UriLocator(); +} +#endif +#endif } // namespace roc #endif // WITHOUT_HSA_BACKEND diff --git a/device/rocm/rocdevice.hpp b/device/rocm/rocdevice.hpp index b3d009c52..e8f479104 100644 --- a/device/rocm/rocdevice.hpp +++ b/device/rocm/rocdevice.hpp @@ -1,4 +1,4 @@ -/* Copyright (c) 2009-present Advanced Micro Devices, Inc. +/* Copyright (c) 2009 - 2021 Advanced Micro Devices, Inc. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal @@ -77,6 +77,24 @@ class VirtualDevice; class PrintfDbg; class IProDevice; +class ProfilingSignal : public amd::ReferenceCountedObject { +public: + hsa_signal_t signal_; //!< HSA signal to track profiling information + Timestamp* ts_; //!< Timestamp object associated with the signal + HwQueueEngine engine_; //!< Engine used with this signal + bool done_; //!< True if signal is done + amd::Monitor lock_; //!< Signal lock for update + ProfilingSignal() + : ts_(nullptr) + , engine_(HwQueueEngine::Compute) + , done_(true) + , lock_("Signal Ops Lock", true) + { signal_.handle = 0; } + + virtual ~ProfilingSignal(); + amd::Monitor& LockSignalOps() { return lock_; } +}; + class Sampler : public device::Sampler { public: //! Constructor @@ -232,7 +250,21 @@ class NullDevice : public amd::Device { return true; } - virtual bool SetClockMode(const cl_set_device_clock_mode_input_amd setClockModeInput, cl_set_device_clock_mode_output_amd* pSetClockModeOutput) { return true; } + virtual bool SetClockMode( + const cl_set_device_clock_mode_input_amd setClockModeInput, + cl_set_device_clock_mode_output_amd* pSetClockModeOutput) { return true; } + + virtual bool IsHwEventReady(const amd::Event& event, bool wait = false) const { return false; } + virtual void ReleaseGlobalSignal(void* signal) const {} + +#if defined(__clang__) +#if __has_feature(address_sanitizer) + virtual device::UriLocator* createUriLocator() const { + ShouldNotReachHere(); + return nullptr; + } +#endif +#endif protected: //! Initialize compiler instance and handle @@ -400,6 +432,9 @@ class Device : public NullDevice { virtual bool SetClockMode(const cl_set_device_clock_mode_input_amd setClockModeInput, cl_set_device_clock_mode_output_amd* pSetClockModeOutput); + virtual bool IsHwEventReady(const amd::Event& event, bool wait = false) const; + virtual void ReleaseGlobalSignal(void* signal) const; + //! Allocate host memory in terms of numa policy set by user void* hostNumaAlloc(size_t size, size_t alignment, bool atomics = false) const; @@ -574,6 +609,11 @@ class Device : public NullDevice { //! enum for keeping the total and available queue priorities enum QueuePriority : uint { Low = 0, Normal = 1, High = 2, Total = 3}; +#if defined(__clang__) +#if __has_feature(address_sanitizer) + virtual device::UriLocator* createUriLocator() const; +#endif +#endif }; // class roc::Device } // namespace roc diff --git a/device/rocm/rocglinterop.cpp b/device/rocm/rocglinterop.cpp index 9d81e4495..02754c259 100644 --- a/device/rocm/rocglinterop.cpp +++ b/device/rocm/rocglinterop.cpp @@ -1,4 +1,4 @@ -/* Copyright (c) 2016-present Advanced Micro Devices, Inc. +/* Copyright (c) 2016 - 2021 Advanced Micro Devices, Inc. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/device/rocm/rocglinterop.hpp b/device/rocm/rocglinterop.hpp index 88102d0e5..a539cc6fc 100644 --- a/device/rocm/rocglinterop.hpp +++ b/device/rocm/rocglinterop.hpp @@ -1,4 +1,4 @@ -/* Copyright (c) 2016-present Advanced Micro Devices, Inc. +/* Copyright (c) 2016 - 2021 Advanced Micro Devices, Inc. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/device/rocm/rockernel.cpp b/device/rocm/rockernel.cpp index e9caf329d..d46c9adef 100644 --- a/device/rocm/rockernel.cpp +++ b/device/rocm/rockernel.cpp @@ -1,4 +1,4 @@ -/* Copyright (c) 2009-present Advanced Micro Devices, Inc. +/* Copyright (c) 2009 - 2021 Advanced Micro Devices, Inc. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/device/rocm/rockernel.hpp b/device/rocm/rockernel.hpp index c6cedd83d..9f52e059a 100644 --- a/device/rocm/rockernel.hpp +++ b/device/rocm/rockernel.hpp @@ -1,4 +1,4 @@ -/* Copyright (c) 2009-present Advanced Micro Devices, Inc. +/* Copyright (c) 2009 - 2021 Advanced Micro Devices, Inc. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/device/rocm/rocmemory.cpp b/device/rocm/rocmemory.cpp index 998066118..2182111c8 100644 --- a/device/rocm/rocmemory.cpp +++ b/device/rocm/rocmemory.cpp @@ -1,4 +1,4 @@ -/* Copyright (c) 2008-present Advanced Micro Devices, Inc. +/* Copyright (c) 2008 - 2021 Advanced Micro Devices, Inc. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal @@ -269,13 +269,12 @@ bool Memory::createInteropBuffer(GLenum targetType, int miplevel) { if (status != HSA_STATUS_SUCCESS) return false; - //! @todo Need to handle metadata correctly -#if 0 - // if map_buffer wrote anything in metadata, copy it to amdImageDesc_ - if (metadata_size != 0) { + // if map_buffer wrote a legitimate SRD, copy it to amdImageDesc_ + if ((metadata_size != 0) && + (reinterpret_cast(metadata)->deviceID == + amdImageDesc_->deviceID)) { memcpy(amdImageDesc_, metadata, metadata_size); } -#endif //0 kind_ = MEMORY_KIND_INTEROP; assert(deviceMemory_ != nullptr && "Interop map failed to produce a pointer!"); @@ -1006,28 +1005,28 @@ void Image::populateImageDescriptor() { switch (image->getType()) { case CL_MEM_OBJECT_IMAGE1D: imageDescriptor_.geometry = HSA_EXT_IMAGE_GEOMETRY_1D; - imageDescriptor_.height = 1; - imageDescriptor_.depth = 1; + imageDescriptor_.height = 0; + imageDescriptor_.depth = 0; break; case CL_MEM_OBJECT_IMAGE1D_BUFFER: imageDescriptor_.geometry = HSA_EXT_IMAGE_GEOMETRY_1DB; - imageDescriptor_.height = 1; - imageDescriptor_.depth = 1; + imageDescriptor_.height = 0; + imageDescriptor_.depth = 0; break; case CL_MEM_OBJECT_IMAGE1D_ARRAY: //@todo - arraySize = height ?! imageDescriptor_.geometry = HSA_EXT_IMAGE_GEOMETRY_1DA; - imageDescriptor_.height = 1; + imageDescriptor_.height = 0; imageDescriptor_.array_size = image->getHeight(); break; case CL_MEM_OBJECT_IMAGE2D: imageDescriptor_.geometry = HSA_EXT_IMAGE_GEOMETRY_2D; - imageDescriptor_.depth = 1; + imageDescriptor_.depth = 0; break; case CL_MEM_OBJECT_IMAGE2D_ARRAY: //@todo - arraySize = depth ?! imageDescriptor_.geometry = HSA_EXT_IMAGE_GEOMETRY_2DA; - imageDescriptor_.depth = 1; + imageDescriptor_.depth = 0; imageDescriptor_.array_size = image->getDepth(); break; case CL_MEM_OBJECT_IMAGE3D: diff --git a/device/rocm/rocmemory.hpp b/device/rocm/rocmemory.hpp index 6f9468e41..4eaa57619 100644 --- a/device/rocm/rocmemory.hpp +++ b/device/rocm/rocmemory.hpp @@ -1,4 +1,4 @@ -/* Copyright (c) 2016-present Advanced Micro Devices, Inc. +/* Copyright (c) 2016 - 2021 Advanced Micro Devices, Inc. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/device/rocm/rocprintf.cpp b/device/rocm/rocprintf.cpp index 83088a753..a7740e17a 100644 --- a/device/rocm/rocprintf.cpp +++ b/device/rocm/rocprintf.cpp @@ -1,4 +1,4 @@ -/* Copyright (c) 2010-present Advanced Micro Devices, Inc. +/* Copyright (c) 2010 - 2021 Advanced Micro Devices, Inc. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/device/rocm/rocprintf.hpp b/device/rocm/rocprintf.hpp index 7a87aa86d..2945ee835 100644 --- a/device/rocm/rocprintf.hpp +++ b/device/rocm/rocprintf.hpp @@ -1,4 +1,4 @@ -/* Copyright (c) 2010-present Advanced Micro Devices, Inc. +/* Copyright (c) 2010 - 2021 Advanced Micro Devices, Inc. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/device/rocm/rocprogram.cpp b/device/rocm/rocprogram.cpp index f791e449b..32aed3d55 100644 --- a/device/rocm/rocprogram.cpp +++ b/device/rocm/rocprogram.cpp @@ -1,4 +1,4 @@ -/* Copyright (c) 2008-present Advanced Micro Devices, Inc. +/* Copyright (c) 2008 - 2021 Advanced Micro Devices, Inc. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal @@ -190,8 +190,9 @@ bool Program::createGlobalVarObj(amd::Memory** amd_mem_obj, void** device_pptr, } roc_device = &(rocDevice()); - *amd_mem_obj = new(roc_device->context()) amd::Buffer(roc_device->context(), 0, *bytes, - *device_pptr); + *amd_mem_obj = new(roc_device->context()) amd::Buffer(roc_device->context(), + ROCCLR_MEM_INTERNAL_MEMORY, + *bytes, *device_pptr); if (*amd_mem_obj == nullptr) { buildLog_ += "[OCL] Failed to create a mem object!"; diff --git a/device/rocm/rocprogram.hpp b/device/rocm/rocprogram.hpp index 0776df689..4c0b7fbdc 100644 --- a/device/rocm/rocprogram.hpp +++ b/device/rocm/rocprogram.hpp @@ -1,4 +1,4 @@ -/* Copyright (c) 2008-present Advanced Micro Devices, Inc. +/* Copyright (c) 2008 - 2021 Advanced Micro Devices, Inc. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/device/rocm/rocregisters.hpp b/device/rocm/rocregisters.hpp index 721f06eee..fa05ff98e 100644 --- a/device/rocm/rocregisters.hpp +++ b/device/rocm/rocregisters.hpp @@ -1,4 +1,4 @@ -/* Copyright (c) 2016-present Advanced Micro Devices, Inc. +/* Copyright (c) 2016 - 2021 Advanced Micro Devices, Inc. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal @@ -18,13 +18,6 @@ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ -/* -Definitions taken from Mesa radeonsi and GCN3 isa manual. -https://cgit.freedesktop.org/mesa/mesa/tree/src/gallium/drivers/radeonsi/sid.h -http://gpuopen.com/compute-product/amd-gcn3-isa-architecture-manual/ - -WORD7 is defined in mesa but has no fields and isn't in GCN3 doc. Can I use this? -*/ #pragma once #ifndef WITHOUT_HSA_BACKEND diff --git a/device/rocm/rocsched.hpp b/device/rocm/rocsched.hpp index da7b2c841..bf431bd1a 100644 --- a/device/rocm/rocsched.hpp +++ b/device/rocm/rocsched.hpp @@ -1,4 +1,4 @@ -/* Copyright (c) 2018-present Advanced Micro Devices, Inc. +/* Copyright (c) 2018 - 2021 Advanced Micro Devices, Inc. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/device/rocm/rocschedcl.cpp b/device/rocm/rocschedcl.cpp index f06d30055..051c28a51 100644 --- a/device/rocm/rocschedcl.cpp +++ b/device/rocm/rocschedcl.cpp @@ -1,4 +1,4 @@ -/* Copyright (c) 2018-present Advanced Micro Devices, Inc. +/* Copyright (c) 2018 - 2021 Advanced Micro Devices, Inc. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/device/rocm/rocsettings.cpp b/device/rocm/rocsettings.cpp index 62cbc3579..17c940e3f 100644 --- a/device/rocm/rocsettings.cpp +++ b/device/rocm/rocsettings.cpp @@ -1,4 +1,4 @@ -/* Copyright (c) 2010-present Advanced Micro Devices, Inc. +/* Copyright (c) 2010 - 2021 Advanced Micro Devices, Inc. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal @@ -129,11 +129,9 @@ bool Settings::create(bool fullProfile, uint32_t gfxipMajor, uint32_t gfxipMinor enableExtension(ClAmdMediaOps2); enableExtension(ClKhrImage2dFromBuffer); -#ifdef ROCCLR_ENABLE_GL_SHARING if (MesaInterop::Supported()) { enableExtension(ClKhrGlSharing); } -#endif // Enable platform extension enableExtension(ClAmdDeviceAttributeQuery); diff --git a/device/rocm/rocsettings.hpp b/device/rocm/rocsettings.hpp index 34baa1ec4..a7107ce86 100644 --- a/device/rocm/rocsettings.hpp +++ b/device/rocm/rocsettings.hpp @@ -1,4 +1,4 @@ -/* Copyright (c) 2010-present Advanced Micro Devices, Inc. +/* Copyright (c) 2010 - 2021 Advanced Micro Devices, Inc. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/device/rocm/rocsignal.cpp b/device/rocm/rocsignal.cpp index 013f4f3e5..d6a7b68eb 100644 --- a/device/rocm/rocsignal.cpp +++ b/device/rocm/rocsignal.cpp @@ -1,4 +1,4 @@ -/* Copyright (c) 2021-present Advanced Micro Devices, Inc. +/* Copyright (c) 2021 - 2021 Advanced Micro Devices, Inc. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/device/rocm/rocsignal.hpp b/device/rocm/rocsignal.hpp index 2350c14e2..3e8c1e6e6 100644 --- a/device/rocm/rocsignal.hpp +++ b/device/rocm/rocsignal.hpp @@ -1,4 +1,4 @@ -/* Copyright (c) 2021-present Advanced Micro Devices, Inc. +/* Copyright (c) 2021 - 2021 Advanced Micro Devices, Inc. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/device/rocm/rocurilocator.cpp b/device/rocm/rocurilocator.cpp new file mode 100644 index 000000000..157f07d99 --- /dev/null +++ b/device/rocm/rocurilocator.cpp @@ -0,0 +1,183 @@ +/* Copyright (c) 2021 - 2021 Advanced Micro Devices, Inc. + + Permission is hereby granted, free of charge, to any person obtaining a copy + of this software and associated documentation files (the "Software"), to deal + in the Software without restriction, including without limitation the rights + to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + copies of the Software, and to permit persons to whom the Software is + furnished to do so, subject to the following conditions: + + The above copyright notice and this permission notice shall be included in + all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + THE SOFTWARE. */ + +#if defined(__clang__) +#if __has_feature(address_sanitizer) +#include "rocurilocator.hpp" +#include + +namespace roc { +hsa_status_t UriLocator::createUriRangeTable() { + auto execCb = [] (hsa_executable_t exec, + void *data) -> hsa_status_t { + int execState = 0; + hsa_status_t status; + status = hsa_executable_get_info(exec, HSA_EXECUTABLE_INFO_STATE, &execState); + if (status != HSA_STATUS_SUCCESS) + return status; + if (execState != HSA_EXECUTABLE_STATE_FROZEN) + return status; + + auto loadedCodeObjectCb = [] (hsa_executable_t exec, + hsa_loaded_code_object_t lcobj, void *data) -> hsa_status_t { + hsa_status_t result; + uint64_t loadBAddr = 0, loadSize = 0; + uint32_t uriLen = 0; + int64_t delta = 0; + uint64_t *argsCb = static_cast(data); + hsa_ven_amd_loader_1_03_pfn_t *fnTab = + reinterpret_cast (argsCb[0]); + std::vector *rangeTab = + reinterpret_cast*> (argsCb[1]); + + if (!fnTab->hsa_ven_amd_loader_loaded_code_object_get_info) + return HSA_STATUS_ERROR; + + result = fnTab->hsa_ven_amd_loader_loaded_code_object_get_info(lcobj, + HSA_VEN_AMD_LOADER_LOADED_CODE_OBJECT_INFO_LOAD_BASE, (void*) &loadBAddr); + if (result != HSA_STATUS_SUCCESS) + return result; + + result = fnTab->hsa_ven_amd_loader_loaded_code_object_get_info(lcobj, + HSA_VEN_AMD_LOADER_LOADED_CODE_OBJECT_INFO_LOAD_SIZE, (void*) &loadSize); + if (result != HSA_STATUS_SUCCESS) + return result; + + result = fnTab->hsa_ven_amd_loader_loaded_code_object_get_info(lcobj, + HSA_VEN_AMD_LOADER_LOADED_CODE_OBJECT_INFO_URI_LENGTH, (void*) &uriLen); + if (result != HSA_STATUS_SUCCESS) + return result; + + result = fnTab-> hsa_ven_amd_loader_loaded_code_object_get_info(lcobj, + HSA_VEN_AMD_LOADER_LOADED_CODE_OBJECT_INFO_LOAD_DELTA, (void*) &delta); + if (result != HSA_STATUS_SUCCESS) + return result; + + char *uri = new char[uriLen+1]; + uri[uriLen] = '\0'; + result = fnTab->hsa_ven_amd_loader_loaded_code_object_get_info(lcobj, + HSA_VEN_AMD_LOADER_LOADED_CODE_OBJECT_INFO_URI, (void*) uri); + if (result != HSA_STATUS_SUCCESS) + return result; + rangeTab->push_back(UriRange{loadBAddr, loadBAddr+loadSize-1, + delta, std::string{uri,uriLen+1}}); + delete[] uri; + return HSA_STATUS_SUCCESS; + }; + + uint64_t *args = static_cast(data); + hsa_ven_amd_loader_1_03_pfn_t *fnExtTab = + reinterpret_cast (args[0]); + return fnExtTab->hsa_ven_amd_loader_executable_iterate_loaded_code_objects(exec, + loadedCodeObjectCb, data); + }; + + if (!fn_table_.hsa_ven_amd_loader_iterate_executables) + return HSA_STATUS_ERROR; + + uint64_t callbackArgs[2] = {(uint64_t)& fn_table_, (uint64_t) &rangeTab_}; + return fn_table_.hsa_ven_amd_loader_iterate_executables(execCb, (void*) callbackArgs); +} + +// Encoding of uniform-resource-identifier(URI) is detailed in +// https://llvm.org/docs/AMDGPUUsage.html#loaded-code-object-path-uniform-resource-identifier-uri +std::pair UriLocator::decodeUriAndGetFd(UriInfo& uri, + amd::Os::FileDesc* uri_fd) { + std::ostringstream ss; + char cur; + uint64_t offset = 0, size = 0; + if (uri.uriPath.size() == 0) + return {0,0}; + auto pos = uri.uriPath.find("//"); + if (pos == std::string::npos || uri.uriPath.substr(0, pos) != "file:") { + uri.uriPath=""; + return {0,0}; + } + auto rspos = uri.uriPath.find('#'); + if (rspos != std::string::npos) { + //parse range specifier + std::string offprefix = "offset=", sizeprefix = "size="; + auto sbeg = uri.uriPath.find('&',rspos); + auto offbeg = rspos + offprefix.size()+1; + std::string offstr = uri.uriPath.substr(offbeg, sbeg - offbeg); + auto sizebeg = sbeg + sizeprefix.size()+1; + std::string sizestr = uri.uriPath.substr(sizebeg, uri.uriPath.size()-sizebeg); + offset = std::stoull(offstr, nullptr, 0); + size = std::stoull(sizestr, nullptr, 0); + rspos -= 1; + } + else { + rspos = uri.uriPath.size()-1; + } + pos += 2; + //decode filepath + for (auto i=pos; i<= rspos;) { + cur = uri.uriPath[i]; + if (isalnum(cur) || cur == '/' || cur == '-' || + cur == '_' || cur == '.' || cur == '~') { + ss << cur; + i++; + } + else { + //characters prefix with '%' char + char tbits = uri.uriPath[i+1], lbits = uri.uriPath[i+2]; + uint8_t t = (tbits < 58) ? ( tbits - 48) : ((tbits - 65) + 10); + uint8_t l = (lbits < 58) ? ( lbits - 48) : ((lbits - 65) + 10); + ss << (char)(((0b00000000 | t)<<4) | l); + i += 3; + } + } + uri.uriPath = ss.str(); + size_t fd_size; + (void) amd::Os::GetFileHandle(uri.uriPath.c_str(), uri_fd, &fd_size); + // As per URI locator syntax, range_specifier is optional + // if range_specifier is absent return total size of the file + // and set offset to begin at 0. + if (size == 0) + size = fd_size; + return {offset, size}; +} + +UriLocator::UriInfo UriLocator::lookUpUri(uint64_t device_pc) { + UriInfo errorstate{"", 0}; + + if (!init_) { + hsa_status_t result; + result = hsa_system_get_major_extension_table(HSA_EXTENSION_AMD_LOADER, 1, + sizeof(fn_table_), &fn_table_); + if (result != HSA_STATUS_SUCCESS) + return errorstate; + result = createUriRangeTable(); + if (result != HSA_STATUS_SUCCESS) { + rangeTab_.clear(); + return errorstate; + } + init_ = true; + } + + for(auto& seg : rangeTab_) + if (seg.startAddr_ <= device_pc && device_pc <= seg.endAddr_) + return UriInfo{seg.Uri_.c_str(), seg.elfDelta_}; + + return errorstate; +} +} //namespace roc +#endif +#endif diff --git a/device/rocm/rocurilocator.hpp b/device/rocm/rocurilocator.hpp new file mode 100644 index 000000000..880b6c72f --- /dev/null +++ b/device/rocm/rocurilocator.hpp @@ -0,0 +1,48 @@ +/* Copyright (c) 2019 - 2021 Advanced Micro Devices, Inc. + + Permission is hereby granted, free of charge, to any person obtaining a copy + of this software and associated documentation files (the "Software"), to deal + in the Software without restriction, including without limitation the rights + to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + copies of the Software, and to permit persons to whom the Software is + furnished to do so, subject to the following conditions: + + The above copyright notice and this permission notice shall be included in + all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + THE SOFTWARE. */ + +#pragma once +#if defined(__clang__) +#if __has_feature(address_sanitizer) +#include "device/devurilocator.hpp" +#include "hsa_ven_amd_loader.h" + +#include +namespace roc { +class UriLocator : public device::UriLocator { + bool init_ = false; + struct UriRange { + uint64_t startAddr_, endAddr_; + int64_t elfDelta_; + std::string Uri_; + }; + std::vector rangeTab_; + hsa_ven_amd_loader_1_03_pfn_t fn_table_; + + hsa_status_t createUriRangeTable(); + public: + virtual ~UriLocator() {} + virtual UriInfo lookUpUri(uint64_t device_pc) override; + virtual std::pair decodeUriAndGetFd(UriInfo& uri_path, + amd::Os::FileDesc* uri_fd) override; +}; +} +#endif +#endif diff --git a/device/rocm/rocvirtual.cpp b/device/rocm/rocvirtual.cpp index 8e0ffa2bc..db803164a 100644 --- a/device/rocm/rocvirtual.cpp +++ b/device/rocm/rocvirtual.cpp @@ -1,4 +1,4 @@ -/* Copyright (c) 2013-present Advanced Micro Devices, Inc. +/* Copyright (c) 2013 - 2021 Advanced Micro Devices, Inc. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal @@ -110,17 +110,22 @@ static unsigned extractAqlBits(unsigned v, unsigned pos, unsigned width) { // ================================================================================================ void Timestamp::checkGpuTime() { + amd::ScopedLock s(lock_); if (HwProfiling()) { uint64_t start = std::numeric_limits::max(); uint64_t end = 0; for (auto it : signals_) { - if (hsa_signal_load_relaxed(it->signal_) > 0) { + amd::ScopedLock lock(it->LockSignalOps()); + + // Ignore the wait if runtime processes API callback, because the signal value is bigger + // than expected and the value reset will occur after API callback is done + if (GetCallbackSignal().handle == 0) { WaitForSignal(it->signal_); } // Avoid profiling data for the sync barrier, in tiny performance tests the first call // to ROCr is very slow and that also affects the overall performance of the callback thread - if (command().GetBatchHead() == nullptr) { + if (command().GetBatchHead() == nullptr || command().profilingInfo().marker_ts_) { hsa_amd_profiling_dispatch_time_t time = {}; if (it->engine_ == HwQueueEngine::Compute) { hsa_amd_profiling_get_dispatch_time(gpu()->gpu_device(), it->signal_, &time); @@ -130,10 +135,12 @@ void Timestamp::checkGpuTime() { time.start = time_sdma.start; time.end = time_sdma.end; } + start = std::min(time.start, start); end = std::max(time.end, end); + ClPrint(amd::LOG_INFO, amd::LOG_SIG, "Signal = (0x%lx), start = %ld, " + "end = %ld", it->signal_.handle, start, end); } - it->ts_ = nullptr; it->done_ = true; } signals_.clear(); @@ -150,10 +157,11 @@ bool HsaAmdSignalHandler(hsa_signal_value_t value, void* arg) { amd::Thread* thread = amd::Thread::current(); if (!(thread != nullptr || - ((thread = new amd::HostThread()) != nullptr && thread == amd::Thread::current()))) { + ((thread = new amd::HostThread()) != nullptr && thread == amd::Thread::current() && + amd::Os::setThreadAffinityToMainThread()))) { return false; } - amd::ScopedLock sl(ts->gpu()->execution()); + if (ts->gpu()->isProfilerAttached()) { amd::Command* head = ts->getParsedCommand(); if (head == nullptr) { @@ -185,9 +193,17 @@ bool HsaAmdSignalHandler(hsa_signal_value_t value, void* arg) { ClPrint(amd::LOG_INFO, amd::LOG_SIG, "Handler: value(%d), timestamp(%p), handle(0x%lx)", static_cast(value), arg, ts->HwProfiling() ? ts->Signals()[0]->signal_.handle : 0); + // Save callback signal + hsa_signal_t callback_signal = ts->GetCallbackSignal(); + // Update the batch, since signal is complete ts->gpu()->updateCommandsState(ts->command().GetBatchHead()); + // Reset API callback signal. It will release AQL queue and start commands processing + if (callback_signal.handle != 0) { + hsa_signal_subtract_relaxed(callback_signal, 1); + } + // Return false, so the callback will not be called again for this signal return false; } @@ -303,16 +319,13 @@ void VirtualGPU::MemoryDependency::clear(bool all) { // ================================================================================================ VirtualGPU::HwQueueTracker::~HwQueueTracker() { for (auto& signal: signal_list_) { - if (signal->signal_.handle != 0) { - hsa_signal_destroy(signal->signal_); - } - delete signal; + signal->release(); } } // ================================================================================================ bool VirtualGPU::HwQueueTracker::Create() { - constexpr size_t kSignalListSize = 16; + constexpr size_t kSignalListSize = 32; signal_list_.resize(kSignalListSize); hsa_agent_t agent = gpu_.gpu_device(); @@ -358,6 +371,26 @@ hsa_signal_t VirtualGPU::HwQueueTracker::ActiveSignal( // a GPU waiter(which may be not triggered yet) and CPU signal reset below WaitNext(); + if (signal_list_[current_id_]->referenceCount() > 1) { + // The signal was assigned to the global marker's event, hence runtime can't reuse it + // and needs a new signal + std::unique_ptr signal(new ProfilingSignal()); + if (signal != nullptr) { + hsa_agent_t agent = gpu_.gpu_device(); + const Settings& settings = gpu_.dev().settings(); + hsa_agent_t* agents = (settings.system_scope_signal_) ? nullptr : &agent; + uint32_t num_agents = (settings.system_scope_signal_) ? 0 : 1; + + if (HSA_STATUS_SUCCESS == hsa_signal_create(0, num_agents, agents, &signal->signal_)) { + signal_list_[current_id_]->release(); + signal_list_[current_id_] = signal.release(); + } else { + assert(!"ProfilingSignal reallocaiton failed! Marker has a conflict with signal reuse!"); + } + } else { + assert(!"ProfilingSignal reallocaiton failed! Marker has a conflict with signal reuse!"); + } + } ProfilingSignal* prof_signal = signal_list_[current_id_]; // Reset the signal and return hsa_signal_silent_store_relaxed(prof_signal->signal_, init_val); @@ -366,19 +399,33 @@ hsa_signal_t VirtualGPU::HwQueueTracker::ActiveSignal( if (ts != 0) { // Save HSA signal earlier to make sure the possible callback will have a valid // value for processing + ts->retain(); prof_signal->ts_ = ts; ts->AddProfilingSignal(prof_signal); // If direct dispatch is enabled and the batch head isn't null, then it's a marker and // requires the batch update upon HSA signal completion - if (AMD_DIRECT_DISPATCH && (ts->command().GetBatchHead() != nullptr)) { + if (AMD_DIRECT_DISPATCH && (ts->command().GetBatchHead() != nullptr) && + !ts->command().CpuWaitRequested()) { + uint32_t init_value = kInitSignalValueOne; + // If API callback is enabled, then use a blocking signal for AQL queue. + // HSA signal will be acquired in SW and released after HSA signal callback + if (ts->command().Callback() != nullptr) { + ts->SetCallbackSignal(prof_signal->signal_); + // Blocks AQL queue from further processing + hsa_signal_add_relaxed(prof_signal->signal_, 1); + init_value += 1; + } hsa_status_t result = hsa_amd_signal_async_handler(prof_signal->signal_, - HSA_SIGNAL_CONDITION_LT, kInitSignalValueOne, &HsaAmdSignalHandler, ts); + HSA_SIGNAL_CONDITION_LT, init_value, &HsaAmdSignalHandler, ts); if (HSA_STATUS_SUCCESS != result) { LogError("hsa_amd_signal_async_handler() failed to set the handler!"); } else { ClPrint(amd::LOG_INFO, amd::LOG_SIG, "Set Handler: handle(0x%lx), timestamp(%p)", - prof_signal->signal_.handle, prof_signal); + prof_signal->signal_.handle, prof_signal); } + // Update the current command/marker with HW event + prof_signal->retain(); + ts->command().SetHwEvent(prof_signal); } if (!sdma_profiling_) { hsa_amd_profiling_async_copy_enable(true); @@ -391,12 +438,12 @@ hsa_signal_t VirtualGPU::HwQueueTracker::ActiveSignal( // ================================================================================================ std::vector& VirtualGPU::HwQueueTracker::WaitingSignal(HwQueueEngine engine) { bool explicit_wait = false; - // Rest all current waiting signals + // Reset all current waiting signals waiting_signals_.clear(); // Does runtime switch the active engine? if (engine != engine_) { - // Yes, return the signla from the previous operation for a wait + // Yes, return the signal from the previous operation for a wait engine_ = engine; explicit_wait = true; } else { @@ -404,8 +451,8 @@ std::vector& VirtualGPU::HwQueueTracker::WaitingSignal(HwQueueEngi if (engine == HwQueueEngine::Unknown) { explicit_wait = true; } else { - // Check if skip wait optimizaiton is enabled. It will try to predice the same engine in ROCr - // and ignore signal wait, relying on in-order engine execution + // Check if skip wait optimization is enabled. It will try to predict the same engine in ROCr + // and ignore the signal wait, relying on in-order engine execution const Settings& settings = gpu_.dev().settings(); if (!settings.skip_copy_sync_ && (engine != HwQueueEngine::Compute)) { explicit_wait = true; @@ -414,24 +461,33 @@ std::vector& VirtualGPU::HwQueueTracker::WaitingSignal(HwQueueEngi } // Check if a wait is required if (explicit_wait) { - ProfilingSignal** prof_signal; + bool skip_internal_signal = false; + + for (uint32_t i = 0; i < external_signals_.size(); ++i) { + // If external signal matches internal one, then skip it + if (external_signals_[i]->signal_.handle == + signal_list_[current_id_]->signal_.handle) { + skip_internal_signal = true; + } + } // Add the oldest signal into the tracking for a wait - external_signals_.push_back(signal_list_[current_id_]); - prof_signal = &external_signals_[0]; + if (!skip_internal_signal) { + external_signals_.push_back(signal_list_[current_id_]); + } // Validate all signals for the wait and skip already completed for (uint32_t i = 0; i < external_signals_.size(); ++i) { // Early signal status check - if (hsa_signal_load_relaxed(prof_signal[i]->signal_) > 0) { + if (hsa_signal_load_relaxed(external_signals_[i]->signal_) > 0) { const Settings& settings = gpu_.dev().settings(); - // Actively wait on CPU for 750 us to avoid extra overheads of signal tracking on GPU - if (!WaitForSignal(prof_signal[i]->signal_)) { + // Actively wait on CPU to avoid extra overheads of signal tracking on GPU + if (!WaitForSignal(external_signals_[i]->signal_)) { if (settings.cpu_wait_for_signal_) { // Wait on CPU for completion if requested - CpuWaitForSignal(prof_signal[i]); + CpuWaitForSignal(external_signals_[i]); } else { // Add HSA signal for tracking on GPU - waiting_signals_.push_back(prof_signal[i]->signal_); + waiting_signals_.push_back(external_signals_[i]->signal_); } } } @@ -445,19 +501,21 @@ std::vector& VirtualGPU::HwQueueTracker::WaitingSignal(HwQueueEngi // ================================================================================================ bool VirtualGPU::HwQueueTracker::CpuWaitForSignal(ProfilingSignal* signal) { // Wait for the current signal - if (!signal->done_) { + if (signal->ts_ != nullptr) { // Update timestamp values if requested - if (signal->ts_ != nullptr) { - signal->ts_->checkGpuTime(); - } else { - ClPrint(amd::LOG_DEBUG, amd::LOG_COPY, "[%zx]!\t Host wait on completion_signal=0x%zx", - std::this_thread::get_id(), signal->signal_.handle); - if (!WaitForSignal(signal->signal_)) { - LogPrintfError("Failed signal [0x%lx] wait", signal->signal_); - return false; - } - signal->done_ = true; + auto ts = signal->ts_; + ts->checkGpuTime(); + ts->release(); + signal->ts_ = nullptr; + } else if (!signal->done_) { + amd::ScopedLock lock(signal->LockSignalOps()); + ClPrint(amd::LOG_DEBUG, amd::LOG_COPY, "[%zx]!\t Host wait on completion_signal=0x%zx", + std::this_thread::get_id(), signal->signal_.handle); + if (!WaitForSignal(signal->signal_, gpu_.ActiveWait())) { + LogPrintfError("Failed signal [0x%lx] wait", signal->signal_); + return false; } + signal->done_ = true; } return true; } @@ -891,12 +949,6 @@ void VirtualGPU::dispatchBarrierPacket(uint16_t packetHeader, bool skipSignal) { __atomic_store_n(reinterpret_cast(aql_loc), packetHeader, __ATOMIC_RELEASE); hsa_signal_store_screlease(gpu_queue_->doorbell_signal, index); - // Clear dependent signals for the next packet - barrier_packet_.dep_signal[0] = hsa_signal_t{}; - barrier_packet_.dep_signal[1] = hsa_signal_t{}; - barrier_packet_.dep_signal[2] = hsa_signal_t{}; - barrier_packet_.dep_signal[3] = hsa_signal_t{}; - barrier_packet_.dep_signal[4] = hsa_signal_t{}; ClPrint(amd::LOG_DEBUG, amd::LOG_AQL, "[%zx] HWq=0x%zx, BarrierAND Header = 0x%x (type=%d, barrier=%d, acquire=%d," " release=%d), " @@ -913,6 +965,12 @@ void VirtualGPU::dispatchBarrierPacket(uint16_t packetHeader, bool skipSignal) { barrier_packet_.dep_signal[0], barrier_packet_.dep_signal[1], barrier_packet_.dep_signal[2], barrier_packet_.dep_signal[3], barrier_packet_.dep_signal[4], barrier_packet_.completion_signal); + // Clear dependent signals for the next packet + barrier_packet_.dep_signal[0] = hsa_signal_t{}; + barrier_packet_.dep_signal[1] = hsa_signal_t{}; + barrier_packet_.dep_signal[2] = hsa_signal_t{}; + barrier_packet_.dep_signal[3] = hsa_signal_t{}; + barrier_packet_.dep_signal[4] = hsa_signal_t{}; } // ================================================================================================ @@ -1023,7 +1081,7 @@ VirtualGPU::~VirtualGPU() { releasePinnedMem(); if (timestamp_ != nullptr) { - delete timestamp_; + timestamp_->release(); timestamp_ = nullptr; LogError("There was a timestamp that was not used; deleting."); } @@ -1179,7 +1237,8 @@ void VirtualGPU::profilingBegin(amd::Command& command, bool drmProfiling) { (*it)->NotifyEvent()->HwEvent() : (*it)->HwEvent(); if (hw_event != nullptr) { Barriers().AddExternalSignal(reinterpret_cast(hw_event)); - } else if (static_cast(*it)->queue() != command.queue()) { + } else if (static_cast(*it)->queue() != command.queue() && + ((*it)->status() != CL_COMPLETE)) { LogPrintfError("Waiting event(%p) doesn't have a HSA signal!\n", *it); } else { // Assume serialization on the same queue... @@ -1200,9 +1259,6 @@ void VirtualGPU::profilingEnd(amd::Command& command) { timestamp_->end(); } command.setData(timestamp_); - if (AMD_DIRECT_DISPATCH) { - command.SetHwEvent(timestamp_->Signals().back()); - } timestamp_ = nullptr; } } @@ -1261,7 +1317,7 @@ void VirtualGPU::updateCommandsState(amd::Command* list) const { ts = reinterpret_cast(current->data()); startTimeStamp = ts->getStart(); endTimeStamp = ts->getEnd(); - delete ts; + ts->release(); current->setData(nullptr); } else { // If we don't have a command that contains a valid timestamp, @@ -2253,6 +2309,9 @@ void VirtualGPU::submitStreamOperation(amd::StreamOperationCommand& cmd) { amd::Coord3D size(sizeBytes); bool entire = amdMemory->isEntirelyCovered(origin, size); + // Ensure memory ordering preceding the write + dispatchBarrierPacket(kBarrierPacketReleaseHeader); + // Use GPU Blit to write bool result = blitMgr().fillBuffer(*memory, &value, sizeBytes, origin, size, entire, true); ClPrint(amd::LOG_DEBUG, amd::LOG_COPY, "Writting value: 0x%lx", value); @@ -2863,7 +2922,7 @@ void VirtualGPU::submitKernel(amd::NDRangeKernelCommand& vcmd) { queue->profilingEnd(vcmd); } else { - // Make sure VirtualGPU has an exclusive access to the resources + // Make sure VirtualGPU has an exclusive access to the resources amd::ScopedLock lock(execution()); profilingBegin(vcmd); @@ -2886,19 +2945,24 @@ void VirtualGPU::submitNativeFn(amd::NativeFnCommand& cmd) { // ================================================================================================ void VirtualGPU::submitMarker(amd::Marker& vcmd) { - if (vcmd.profilingInfo().marker_ts_) { - profilingBegin(vcmd); - if (timestamp_ != nullptr) { - // If there was a pending dispatch use a Barrier packet - // with cache flushes. This saves on additional barrier - // for cache flushes explicitly and helps wall time - dispatchBarrierPacket(kNopPacketHeader); - // Direct dispatch requires a barrier with callback and hasPendingDispatch_ triggers that - if (AMD_DIRECT_DISPATCH) { - hasPendingDispatch_ = true; + if (AMD_DIRECT_DISPATCH || vcmd.profilingInfo().marker_ts_) { + // Make sure VirtualGPU has an exclusive access to the resources + amd::ScopedLock lock(execution()); + if (vcmd.CpuWaitRequested()) { + // It should be safe to call flush directly if there are not pending dispatches without + // HSA signal callback + flush(vcmd.GetBatchHead()); + } else { + profilingBegin(vcmd); + if (timestamp_ != nullptr) { + // Submit a barrier with a cache flushes. + dispatchBarrierPacket(kBarrierPacketHeader, false); + + hasPendingDispatch_ = false; } + profilingEnd(vcmd); } - profilingEnd(vcmd); + } } @@ -2922,45 +2986,13 @@ void VirtualGPU::submitReleaseExtObjects(amd::ReleaseExtObjectsCommand& vcmd) { // ================================================================================================ void VirtualGPU::flush(amd::Command* list, bool wait) { - // Direct dispatch relies on HSA signal callback - bool skip_cpu_wait = AMD_DIRECT_DISPATCH; - - if (skip_cpu_wait) { - // Search for the last command in the batch to track GPU state - amd::Command* current = list; - assert(current != nullptr && "Empty batch for processing!"); - - // Find the last command - while (current->getNext() != nullptr) { - current = current->getNext(); - } - // Always insert a barrier. Some tests rquire async SDMA wait - hasPendingDispatch_ = true; - // Enable profiling, so runtime can track TS - profilingBegin(*current); - - // If runtime didn't submit a barrier, then it can't track the completion of the batch. - // Hence runtime either has to insert a barrier unconditionally or have a CPU wait. - // Due to performance impact of extra barriers CPU wait is selected. - // Note: if callback will be selected to update the batch status, - // then the host thread can't update it also, otherwise double free may occur - skip_cpu_wait &= hasPendingDispatch_; - - releaseGpuMemoryFence(skip_cpu_wait); - profilingEnd(*current); - } else { - // If barrier is requested, then wait for everything, otherwise - // a per disaptch wait will occur later in updateCommandsState() - releaseGpuMemoryFence(); - } - - // If CPU waited for GPU, then the queue is idle - if (!skip_cpu_wait) { - updateCommandsState(list); + // If barrier is requested, then wait for everything, otherwise + // a per disaptch wait will occur later in updateCommandsState() + releaseGpuMemoryFence(); + updateCommandsState(list); - // Release all pinned memory - releasePinnedMem(); - } + // Release all pinned memory + releasePinnedMem(); } // ================================================================================================ diff --git a/device/rocm/rocvirtual.hpp b/device/rocm/rocvirtual.hpp index 85987083b..f1efa435c 100644 --- a/device/rocm/rocvirtual.hpp +++ b/device/rocm/rocvirtual.hpp @@ -1,4 +1,4 @@ -/* Copyright (c) 2008-present Advanced Micro Devices, Inc. +/* Copyright (c) 2008 - 2021 Advanced Micro Devices, Inc. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal @@ -34,41 +34,41 @@ namespace roc { class Device; class Memory; +struct ProfilingSignal; class Timestamp; -struct ProfilingSignal : public amd::HeapObject { - hsa_signal_t signal_; //!< HSA signal to track profiling information - Timestamp* ts_; //!< Timestamp object associated with the signal - HwQueueEngine engine_; //!< Engine used with this signal - bool done_; //!< True if signal is done - ProfilingSignal() - : ts_(nullptr) - , engine_(HwQueueEngine::Compute) - , done_(true) - { signal_.handle = 0; } -}; - // Initial HSA signal value constexpr static hsa_signal_value_t kInitSignalValueOne = 1; // Timeouts for HSA signal wait -constexpr static uint64_t kTimeout100us = 100000; -constexpr static uint64_t kTimeout750us = 750000; +constexpr static uint64_t kTimeout100us = 100 * K; constexpr static uint64_t kUnlimitedWait = std::numeric_limits::max(); -template -inline bool WaitForSignal(hsa_signal_t signal) { - if (wait_time != 0) { - if (hsa_signal_wait_scacquire(signal, HSA_SIGNAL_CONDITION_LT, kInitSignalValueOne, - wait_time, HSA_WAIT_STATE_ACTIVE) != 0) { - return false; +template +inline bool WaitForSignal(hsa_signal_t signal, bool active_wait = false) { + if (hsa_signal_load_relaxed(signal) > 0) { + uint64_t timeout = kTimeout100us; + if (active_wait) { + timeout = kUnlimitedWait; + } else if (active_wait_timeout) { + timeout = ROC_ACTIVE_WAIT_TIMEOUT * K; + if (timeout == 0) { + return false; + } } - } else { - uint64_t timeout = (ROC_ACTIVE_WAIT) ? kUnlimitedWait : kTimeout100us; + + ClPrint(amd::LOG_INFO, amd::LOG_SIG, "Host active wait for Signal = (0x%lx) for %d ns", + signal.handle, timeout); // Active wait with a timeout if (hsa_signal_wait_scacquire(signal, HSA_SIGNAL_CONDITION_LT, kInitSignalValueOne, timeout, HSA_WAIT_STATE_ACTIVE) != 0) { + if (active_wait_timeout) { + return false; + } + ClPrint(amd::LOG_INFO, amd::LOG_SIG, "Host blocked wait for Signal = (0x%lx)", + signal.handle); + // Wait until the completion with CPU suspend if (hsa_signal_wait_scacquire(signal, HSA_SIGNAL_CONDITION_LT, kInitSignalValueOne, kUnlimitedWait, HSA_WAIT_STATE_BLOCKED) != 0) { @@ -76,29 +76,37 @@ inline bool WaitForSignal(hsa_signal_t signal) { } } } + return true; } // Timestamp for keeping track of some profiling information for various commands // including EnqueueNDRangeKernel and clEnqueueCopyBuffer. -class Timestamp : public amd::HeapObject { +class Timestamp : public amd::ReferenceCountedObject { private: static double ticksToTime_; uint64_t start_; uint64_t end_; VirtualGPU* gpu_; //!< Virtual GPU, associated with this timestamp - const amd::Command& command_; //!< Command, associated with this timestamp + amd::Command& command_; ///!< Command, associated with this timestamp amd::Command* parsedCommand_; //!< Command down the list, considering command_ as head - std::vector signals_; + std::vector signals_; //!< The list of all signals, associated with the TS + hsa_signal_t callback_signal_; //!< Signal associated with a callback for possible later update + amd::Monitor lock_; //!< Serialize timestamp update + + Timestamp(const Timestamp&) = delete; + Timestamp& operator=(const Timestamp&) = delete; public: - Timestamp(VirtualGPU* gpu, const amd::Command& command) + Timestamp(VirtualGPU* gpu, amd::Command& command) : start_(std::numeric_limits::max()) , end_(0) , gpu_(gpu) , command_(command) - , parsedCommand_(nullptr) {} + , parsedCommand_(nullptr) + , callback_signal_(hsa_signal_t{}) + , lock_("Timestamp lock", true) {} ~Timestamp() {} @@ -125,13 +133,20 @@ class Timestamp : public amd::HeapObject { void start() { start_ = amd::Os::timeNanos(); } // End a timestamp (get timestamp from OS) - void end() { end_ = amd::Os::timeNanos(); } + void end() { + // Timestamp value can be updated by HW profiling if current command had a stall. + // Although CPU TS should be still valid in this situation, there are cases in VM mode + // when CPU timeline is out of sync with GPU timeline and shifted time can be reported + if (end_ == 0) { + end_ = amd::Os::timeNanos(); + } + } static void setGpuTicksToTime(double ticksToTime) { ticksToTime_ = ticksToTime; } static double getGpuTicksToTime() { return ticksToTime_; } //! Returns amd::command assigned to this timestamp - const amd::Command& command() const { return command_; } + amd::Command& command() const { return command_; } //! Sets the parsed command void setParsedCommand(amd::Command* command) { parsedCommand_ = command; } @@ -141,6 +156,14 @@ class Timestamp : public amd::HeapObject { //! Returns virtual GPU device, used with this timestamp VirtualGPU* gpu() const { return gpu_; } + + //! Updates the callback signal + void SetCallbackSignal(hsa_signal_t callback_signal) { + callback_signal_ = callback_signal; + } + + //! Returns the callback signal + hsa_signal_t GetCallbackSignal() const { return callback_signal_; } }; class VirtualGPU : public device::VirtualDevice { @@ -197,13 +220,12 @@ class VirtualGPU : public device::VirtualDevice { //! Wait for the curent active signal. Can idle the queue bool WaitCurrent() { ProfilingSignal* signal = signal_list_[current_id_]; - ClPrint(amd::LOG_DEBUG, amd::LOG_MISC, "[%zx]!\t WaitCurret completion_signal=0x%zx", - std::this_thread::get_id(), signal->signal_.handle); return CpuWaitForSignal(signal); } //! Update current active engine void SetActiveEngine(HwQueueEngine engine = HwQueueEngine::Compute) { engine_ = engine; } + HwQueueEngine GetActiveEngine() const { return engine_; } //! Returns the last submitted signal for a wait std::vector& WaitingSignal(HwQueueEngine engine = HwQueueEngine::Compute); @@ -225,8 +247,6 @@ class VirtualGPU : public device::VirtualDevice { void WaitNext() { size_t next = (current_id_ + 1) % signal_list_.size(); ProfilingSignal* signal = signal_list_[next]; - ClPrint(amd::LOG_DEBUG, amd::LOG_MISC, "[%zx]!\t WaitNext completion_signal=0x%zx", - std::this_thread::get_id(), signal->signal_.handle); CpuWaitForSignal(signal); } diff --git a/elf/elf.cpp b/elf/elf.cpp index 0e08feed0..3dfe41989 100644 --- a/elf/elf.cpp +++ b/elf/elf.cpp @@ -1,4 +1,4 @@ -/* Copyright (c) 2010-present Advanced Micro Devices, Inc. +/* Copyright (c) 2010 - 2021 Advanced Micro Devices, Inc. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/elf/elf.hpp b/elf/elf.hpp index 15268f108..14220ad4b 100644 --- a/elf/elf.hpp +++ b/elf/elf.hpp @@ -1,4 +1,4 @@ -/* Copyright (c) 2008-present Advanced Micro Devices, Inc. +/* Copyright (c) 2008 - 2021 Advanced Micro Devices, Inc. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/elf/elfio/elf_types.hpp b/elf/elfio/elf_types.hpp index 966b3a116..5a286e09c 100644 --- a/elf/elfio/elf_types.hpp +++ b/elf/elfio/elf_types.hpp @@ -1,6 +1,6 @@ /* Copyright (C) 2001-2015 by Serge Lamikhov-Center -Modifications Copyright (C) 2020-2021 Advanced Micro Devices, Inc. +Modifications Copyright (c) 2020 - 2021 Advanced Micro Devices, Inc. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/elf/elfio/elfio.hpp b/elf/elfio/elfio.hpp index 3ed1fb0e8..2985f79e1 100644 --- a/elf/elfio/elfio.hpp +++ b/elf/elfio/elfio.hpp @@ -1,6 +1,6 @@ /* Copyright (C) 2001-2015 by Serge Lamikhov-Center -Modifications Copyright (C) 2020-2021 Advanced Micro Devices, Inc. +Modifications Copyright (c) 2020 - 2021 Advanced Micro Devices, Inc. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/elf/elfio/elfio_dump.hpp b/elf/elfio/elfio_dump.hpp index c70bf538b..86d70b58b 100644 --- a/elf/elfio/elfio_dump.hpp +++ b/elf/elfio/elfio_dump.hpp @@ -1,6 +1,6 @@ /* Copyright (C) 2001-2015 by Serge Lamikhov-Center -Modifications Copyright (C) 2020-2021 Advanced Micro Devices, Inc. +Modifications Copyright (c) 2020 - 2021 Advanced Micro Devices, Inc. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/elf/elfio/elfio_dynamic.hpp b/elf/elfio/elfio_dynamic.hpp index 87cf62d55..9350c84ed 100644 --- a/elf/elfio/elfio_dynamic.hpp +++ b/elf/elfio/elfio_dynamic.hpp @@ -1,6 +1,6 @@ /* Copyright (C) 2001-2015 by Serge Lamikhov-Center -Modifications Copyright (C) 2020-2021 Advanced Micro Devices, Inc. +Modifications Copyright (c) 2020 - 2021 Advanced Micro Devices, Inc. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/elf/elfio/elfio_header.hpp b/elf/elfio/elfio_header.hpp index f061fecc8..c1697b22f 100644 --- a/elf/elfio/elfio_header.hpp +++ b/elf/elfio/elfio_header.hpp @@ -1,6 +1,6 @@ /* Copyright (C) 2001-2015 by Serge Lamikhov-Center -Modifications Copyright (C) 2020-2021 Advanced Micro Devices, Inc. +Modifications Copyright (c) 2020 - 2021 Advanced Micro Devices, Inc. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/elf/elfio/elfio_note.hpp b/elf/elfio/elfio_note.hpp index d054906f1..725294eaa 100644 --- a/elf/elfio/elfio_note.hpp +++ b/elf/elfio/elfio_note.hpp @@ -1,6 +1,6 @@ /* Copyright (C) 2001-2015 by Serge Lamikhov-Center -Modifications Copyright (C) 2020-2021 Advanced Micro Devices, Inc. +Modifications Copyright (c) 2020 - 2021 Advanced Micro Devices, Inc. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/elf/elfio/elfio_relocation.hpp b/elf/elfio/elfio_relocation.hpp index b6aa16016..18804b8d7 100644 --- a/elf/elfio/elfio_relocation.hpp +++ b/elf/elfio/elfio_relocation.hpp @@ -1,6 +1,6 @@ /* Copyright (C) 2001-2015 by Serge Lamikhov-Center -Modifications Copyright (C) 2020-2021 Advanced Micro Devices, Inc. +Modifications Copyright (c) 2020 - 2021 Advanced Micro Devices, Inc. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/elf/elfio/elfio_section.hpp b/elf/elfio/elfio_section.hpp index 22e0f5791..21ca4bc24 100644 --- a/elf/elfio/elfio_section.hpp +++ b/elf/elfio/elfio_section.hpp @@ -1,6 +1,6 @@ /* Copyright (C) 2001-2015 by Serge Lamikhov-Center -Modifications Copyright (C) 2020-2021 Advanced Micro Devices, Inc. +Modifications Copyright (c) 2020 - 2021 Advanced Micro Devices, Inc. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/elf/elfio/elfio_segment.hpp b/elf/elfio/elfio_segment.hpp index 3de7e2278..f35412497 100644 --- a/elf/elfio/elfio_segment.hpp +++ b/elf/elfio/elfio_segment.hpp @@ -1,6 +1,6 @@ /* Copyright (C) 2001-2015 by Serge Lamikhov-Center -Modifications Copyright (C) 2020-2021 Advanced Micro Devices, Inc. +Modifications Copyright (c) 2020 - 2021 Advanced Micro Devices, Inc. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/elf/elfio/elfio_strings.hpp b/elf/elfio/elfio_strings.hpp index cda5bd7c4..88a45e5d7 100644 --- a/elf/elfio/elfio_strings.hpp +++ b/elf/elfio/elfio_strings.hpp @@ -1,6 +1,6 @@ /* Copyright (C) 2001-2015 by Serge Lamikhov-Center -Modifications Copyright (C) 2020-2021 Advanced Micro Devices, Inc. +Modifications Copyright (c) 2020 - 2021 Advanced Micro Devices, Inc. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/elf/elfio/elfio_symbols.hpp b/elf/elfio/elfio_symbols.hpp index 65482d30b..96ad16d3b 100644 --- a/elf/elfio/elfio_symbols.hpp +++ b/elf/elfio/elfio_symbols.hpp @@ -1,6 +1,6 @@ /* Copyright (C) 2001-2015 by Serge Lamikhov-Center -Modifications Copyright (C) 2020-2021 Advanced Micro Devices, Inc. +Modifications Copyright (c) 2020 - 2021 Advanced Micro Devices, Inc. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/elf/elfio/elfio_utils.hpp b/elf/elfio/elfio_utils.hpp index 6dd980c3c..6482da2dc 100644 --- a/elf/elfio/elfio_utils.hpp +++ b/elf/elfio/elfio_utils.hpp @@ -1,6 +1,6 @@ /* Copyright (C) 2001-2015 by Serge Lamikhov-Center -Modifications Copyright (C) 2020-2021 Advanced Micro Devices, Inc. +Modifications Copyright (c) 2020 - 2021 Advanced Micro Devices, Inc. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/elf/test/CMakeLists.txt b/elf/test/CMakeLists.txt index 8ed8d020b..8326176b0 100644 --- a/elf/test/CMakeLists.txt +++ b/elf/test/CMakeLists.txt @@ -1,4 +1,4 @@ -# Copyright (C) 2020-2021 Advanced Micro Devices, Inc. All Rights Reserved. +# Copyright (c) 2020 - 2021 Advanced Micro Devices, Inc. All Rights Reserved. # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal diff --git a/elf/test/main.cpp b/elf/test/main.cpp index fed06c6a5..bc4d5f625 100644 --- a/elf/test/main.cpp +++ b/elf/test/main.cpp @@ -1,4 +1,4 @@ -/* Copyright (c) 2020-2021 Advanced Micro Devices, Inc. All Rights Reserved. +/* Copyright (c) 2020 - 2021 Advanced Micro Devices, Inc. All Rights Reserved. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/include/top.hpp b/include/top.hpp index 531f92664..f51db7d1e 100644 --- a/include/top.hpp +++ b/include/top.hpp @@ -1,4 +1,4 @@ -/* Copyright (c) 2008-present Advanced Micro Devices, Inc. +/* Copyright (c) 2008 - 2021 Advanced Micro Devices, Inc. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/include/vdi_agent_amd.h b/include/vdi_agent_amd.h index 91d8250db..ffc86697e 100644 --- a/include/vdi_agent_amd.h +++ b/include/vdi_agent_amd.h @@ -1,4 +1,4 @@ -/* Copyright (c) 2010-present Advanced Micro Devices, Inc. +/* Copyright (c) 2010 - 2021 Advanced Micro Devices, Inc. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/include/vdi_common.hpp b/include/vdi_common.hpp index feb73288e..c4dbf023a 100644 --- a/include/vdi_common.hpp +++ b/include/vdi_common.hpp @@ -1,4 +1,4 @@ -/* Copyright (c) 2020-present Advanced Micro Devices, Inc. +/* Copyright (c) 2020 - 2021 Advanced Micro Devices, Inc. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/os/alloc.cpp b/os/alloc.cpp index 6caaea437..ac4f590ff 100644 --- a/os/alloc.cpp +++ b/os/alloc.cpp @@ -1,4 +1,4 @@ -/* Copyright (c) 2008-present Advanced Micro Devices, Inc. +/* Copyright (c) 2008 - 2021 Advanced Micro Devices, Inc. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/os/alloc.hpp b/os/alloc.hpp index d03930af4..7c1729080 100644 --- a/os/alloc.hpp +++ b/os/alloc.hpp @@ -1,4 +1,4 @@ -/* Copyright (c) 2008-present Advanced Micro Devices, Inc. +/* Copyright (c) 2008 - 2021 Advanced Micro Devices, Inc. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/os/os.cpp b/os/os.cpp index d9cd1d35c..f5ea547dd 100644 --- a/os/os.cpp +++ b/os/os.cpp @@ -1,4 +1,4 @@ -/* Copyright (c) 2008-present Advanced Micro Devices, Inc. +/* Copyright (c) 2008 - 2021 Advanced Micro Devices, Inc. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/os/os.hpp b/os/os.hpp index 989363fc0..923b0f450 100644 --- a/os/os.hpp +++ b/os/os.hpp @@ -1,4 +1,4 @@ -/* Copyright (c) 2008-present Advanced Micro Devices, Inc. +/* Copyright (c) 2008 - 2021 Advanced Micro Devices, Inc. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal @@ -121,7 +121,7 @@ class Os : AllStatic { // Given a valid file name, returns mmapped memory with the mapped size. static bool MemoryMapFile(const char* fname, const void** mmap_ptr, size_t* mmap_size); - // Given a valid file name amd mapped size, returns ftruncated mmaped memory + // Given a valid file name amd mapped size, returns ftruncated mmaped memory static bool MemoryMapFileTruncated(const char* fname, const void** mmap_ptr, size_t mmap_size); // Given a valid mmaped ptr with correct size, unmaps the ptr from memory @@ -181,6 +181,9 @@ class Os : AllStatic { static void setThreadAffinity(const void* handle, const ThreadAffinityMask& mask); //! Set the currently running thread's name. static void setCurrentThreadName(const char* name); + //! Set current threads affinity to that of main thread + static bool setThreadAffinityToMainThread(); + //! Check if the thread is alive static bool isThreadAlive(const Thread& osThread); diff --git a/os/os_posix.cpp b/os/os_posix.cpp index 2d5b89a58..04b2f71a7 100644 --- a/os/os_posix.cpp +++ b/os/os_posix.cpp @@ -1,4 +1,4 @@ -/* Copyright (c) 2008-present Advanced Micro Devices, Inc. +/* Copyright (c) 2008 - 2021 Advanced Micro Devices, Inc. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal @@ -131,6 +131,7 @@ static pthread_setaffinity_fn pthread_setaffinity_fptr; static void init() __attribute__((constructor(101))); static void init() { Os::init(); } +static cpu_set_t nativeMask_; bool Os::installSigfpeHandler() { // Install a SIGFPE signal handler @todo: Chain the handlers @@ -160,6 +161,7 @@ bool Os::init() { pageSize_ = (size_t)::sysconf(_SC_PAGESIZE); processorCount_ = ::sysconf(_SC_NPROCESSORS_CONF); + pthread_getaffinity_np(pthread_self(), sizeof(cpu_set_t), &nativeMask_); pthread_setaffinity_fptr = (pthread_setaffinity_fn)dlsym(RTLD_NEXT, "pthread_setaffinity_np"); return Thread::init(); @@ -392,13 +394,20 @@ const void* Os::createOsThread(amd::Thread* thread) { return reinterpret_cast(handle); } - void Os::setThreadAffinity(const void* handle, const Os::ThreadAffinityMask& mask) { if (pthread_setaffinity_fptr != NULL) { pthread_setaffinity_fptr((pthread_t)handle, sizeof(cpu_set_t), &mask.mask_); } } +bool Os::setThreadAffinityToMainThread() { + if (AMD_CPU_AFFINITY) { + ClPrint(amd::LOG_INFO, amd::LOG_INIT, "Setting Affinity to the main thread's affinity"); + pthread_setaffinity_np(pthread_self(), sizeof(cpu_set_t), &nativeMask_); + } + return true; +} + void Os::yield() { ::sched_yield(); } uint64_t Os::timeNanos() { diff --git a/os/os_win32.cpp b/os/os_win32.cpp index 327bd0ecf..499aa694b 100644 --- a/os/os_win32.cpp +++ b/os/os_win32.cpp @@ -1,4 +1,4 @@ -/* Copyright (c) 2008-present Advanced Micro Devices, Inc. +/* Copyright (c) 2008 - 2021 Advanced Micro Devices, Inc. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal @@ -341,6 +341,9 @@ void Os::setThreadAffinity(const void* handle, const Os::ThreadAffinityMask& mas } } +bool Os::setThreadAffinityToMainThread() { + return true; +} void Os::yield() { ::SwitchToThread(); } uint64_t Os::timeNanos() { diff --git a/os/setjmp.S b/os/setjmp.S index 56424147c..f88a64131 100644 --- a/os/setjmp.S +++ b/os/setjmp.S @@ -1,4 +1,4 @@ - # Copyright (c) 2008-present Advanced Micro Devices, Inc. + # Copyright (c) 2008 - 2021 Advanced Micro Devices, Inc. #Permission is hereby granted, free of charge, to any person obtaining a copy #of this software and associated documentation files (the "Software"), to deal diff --git a/os/setjmp.asm b/os/setjmp.asm index a6e16f8e2..4fcb60eab 100644 --- a/os/setjmp.asm +++ b/os/setjmp.asm @@ -1,4 +1,4 @@ - ; Copyright (c) 2008-present Advanced Micro Devices, Inc. + ; Copyright (c) 2008 - 2021 Advanced Micro Devices, Inc. ;Permission is hereby granted, free of charge, to any person obtaining a copy ;of this software and associated documentation files (the "Software"), to deal diff --git a/platform/activity.cpp b/platform/activity.cpp index 34a9f6b23..2e19ef273 100644 --- a/platform/activity.cpp +++ b/platform/activity.cpp @@ -1,4 +1,4 @@ -/* Copyright (c) 2019-present Advanced Micro Devices, Inc. +/* Copyright (c) 2019 - 2021 Advanced Micro Devices, Inc. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/platform/activity.hpp b/platform/activity.hpp index 5c28fb396..bcda9e93e 100644 --- a/platform/activity.hpp +++ b/platform/activity.hpp @@ -1,4 +1,4 @@ -/* Copyright (c) 2019-present Advanced Micro Devices, Inc. +/* Copyright (c) 2019 - 2021 Advanced Micro Devices, Inc. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/platform/agent.cpp b/platform/agent.cpp index 46bce0b61..cc7b107a1 100644 --- a/platform/agent.cpp +++ b/platform/agent.cpp @@ -1,4 +1,4 @@ -/* Copyright (c) 2010-present Advanced Micro Devices, Inc. +/* Copyright (c) 2010 - 2021 Advanced Micro Devices, Inc. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/platform/agent.hpp b/platform/agent.hpp index 77c046d5d..ed4b8f2ea 100644 --- a/platform/agent.hpp +++ b/platform/agent.hpp @@ -1,4 +1,4 @@ -/* Copyright (c) 2010-present Advanced Micro Devices, Inc. +/* Copyright (c) 2010 - 2021 Advanced Micro Devices, Inc. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/platform/command.cpp b/platform/command.cpp index b8dfb244a..f4567f417 100644 --- a/platform/command.cpp +++ b/platform/command.cpp @@ -1,4 +1,4 @@ -/* Copyright (c) 2008-present Advanced Micro Devices, Inc. +/* Copyright (c) 2008 - 2021 Advanced Micro Devices, Inc. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal @@ -48,6 +48,7 @@ Event::Event(HostQueue& queue) status_(CL_INT_MAX), hw_event_(nullptr), notify_event_(nullptr), + device_(&queue.device()), profilingInfo_(IS_PROFILER_ON || queue.properties().test(CL_QUEUE_PROFILING_ENABLE) || Agent::shouldPostEventEvents()) { notified_.clear(); @@ -55,7 +56,7 @@ Event::Event(HostQueue& queue) // ================================================================================================ Event::Event() : callbacks_(NULL), status_(CL_SUBMITTED), - hw_event_(nullptr), notify_event_(nullptr) { notified_.clear(); } + hw_event_(nullptr), notify_event_(nullptr), device_(nullptr) { notified_.clear(); } // ================================================================================================ Event::~Event() { @@ -65,6 +66,14 @@ Event::~Event() { delete callback; callback = next; } + // Release the notify event + if (notify_event_ != nullptr) { + notify_event_->release(); + } + // Destroy global HW event if available + if ((hw_event_ != nullptr) && (device_ != nullptr)) { + device_->ReleaseGlobalSignal(hw_event_); + } } // ================================================================================================ @@ -141,7 +150,9 @@ bool Event::setStatus(int32_t status, uint64_t timeStamp) { if (status <= CL_COMPLETE) { // Before we notify the waiters that this event reached the CL_COMPLETE // status, we release all the resources associated with this instance. - releaseResources(); + if (!IS_HIP) { + releaseResources(); + } activity_.ReportEventTimestamps(command()); // Broadcast all the waiters. @@ -221,11 +232,12 @@ void Event::processCallbacks(int32_t status) const { } } +static constexpr bool kCpuWait = true; // ================================================================================================ bool Event::awaitCompletion() { if (status() > CL_COMPLETE) { - // Notifies current command queue about waiting - if (!notifyCmdQueue()) { + // Notifies the current command queue about waiting + if (!notifyCmdQueue(kCpuWait)) { return false; } @@ -251,30 +263,37 @@ bool Event::awaitCompletion() { } // ================================================================================================ -bool Event::notifyCmdQueue() { +bool Event::notifyCmdQueue(bool cpu_wait) { HostQueue* queue = command().queue(); - if ((status() > CL_COMPLETE) && (nullptr != queue) && - (!AMD_DIRECT_DISPATCH || - // Don't need to notify any marker with direct dispatch, - // because all markers are blocking. - ((command().type() != CL_COMMAND_MARKER) && - (command().type() != 0)) || - // Don't need to notify if the current batch is empty, - // because that means the command was processed and extra notification - // will cause a stall on the host. - (queue->GetSubmittionBatch() != nullptr)) && + if (AMD_DIRECT_DISPATCH) { + ScopedLock l(notify_lock_); + if ((status() > CL_COMPLETE) && (nullptr != queue) && + // If HW event was assigned, then notification can be ignored, since a barrier was issued + (HwEvent() == nullptr) && !notified_.test_and_set()) { - // Make sure the queue is draining the enqueued commands. - amd::Command* command = new amd::Marker(*queue, false, nullWaitList, this); - if (command == NULL) { - notified_.clear(); - return false; + // Make sure the queue is draining the enqueued commands. + amd::Command* command = new amd::Marker(*queue, false, nullWaitList, this, cpu_wait); + if (command == NULL) { + notified_.clear(); + return false; + } + ClPrint(LOG_DEBUG, LOG_CMD, "queue marker to command queue: %p", queue); + command->enqueue(); + // Save notification, associated with the current event + notify_event_ = command; + } + } else { + if ((status() > CL_COMPLETE) && (nullptr != queue) && !notified_.test_and_set()) { + // Make sure the queue is draining the enqueued commands. + amd::Command* command = new amd::Marker(*queue, false, nullWaitList, this); + if (command == NULL) { + notified_.clear(); + return false; + } + ClPrint(LOG_DEBUG, LOG_CMD, "queue marker to command queue: %p", queue); + command->enqueue(); + command->release(); } - ClPrint(LOG_DEBUG, LOG_CMD, "queue marker to command queue: %p", queue); - command->enqueue(); - command->release(); - // Save notification, associated with the current event - notify_event_ = command; } return true; } @@ -319,23 +338,26 @@ void Command::enqueue() { // update will occur later after flush() with a wait if (AMD_DIRECT_DISPATCH) { setStatus(CL_QUEUED); + // Notify all commands about the waiter. Barrier will be sent in order to obtain // HSA signal for a wait on the current queue std::for_each(eventWaitList().begin(), eventWaitList().end(), - std::mem_fun(&Command::notifyCmdQueue)); + std::bind2nd(std::mem_fun(&Command::notifyCmdQueue), !kCpuWait)); // The batch update must be lock protected to avoid a race condition // when multiple threads submit/flush/update the batch at the same time ScopedLock sl(queue_->vdev()->execution()); queue_->FormSubmissionBatch(this); - if ((type() == CL_COMMAND_MARKER || type() == 0) && !profilingInfo().marker_ts_) { + if ((type() == CL_COMMAND_MARKER || type() == 0)) { // The current HSA signal tracking logic requires profiling enabled for the markers EnableProfiling(); // Update batch head for the current marker. Hence the status of all commands can be // updated upon the marker completion SetBatchHead(queue_->GetSubmittionBatch()); - // Flush the current batch, but skip the wait on CPU if possible to avoid a stall - queue_->vdev()->flush(queue_->GetSubmittionBatch()); + + setStatus(CL_SUBMITTED); + submit(*queue_->vdev()); + // The batch will be tracked with the marker now queue_->ResetSubmissionBatch(); } else { diff --git a/platform/command.hpp b/platform/command.hpp index 21bce75a0..838737a53 100644 --- a/platform/command.hpp +++ b/platform/command.hpp @@ -1,4 +1,4 @@ -/* Copyright (c) 2010-present Advanced Micro Devices, Inc. +/* Copyright (c) 2010 - 2021 Advanced Micro Devices, Inc. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal @@ -44,7 +44,8 @@ #include "platform/activity.hpp" #include "platform/command_utils.hpp" -#include "CL/cl_ext.h" +#include +#include #include #include @@ -89,12 +90,14 @@ class Event : public RuntimeObject { private: Monitor lock_; + Monitor notify_lock_; //!< Lock used for notification with direct dispatch only std::atomic callbacks_; //!< linked list of callback entries. std::atomic status_; //!< current execution status. std::atomic_flag notified_; //!< Command queue was notified void* hw_event_; //!< HW event ID associated with SW event Event* notify_event_; //!< Notify event, which should contain HW signal + const Device* device_; //!< Device, this event associated with protected: static const EventWaitList nullWaitList; @@ -205,7 +208,7 @@ class Event : public RuntimeObject { /*! \brief Notifies current command queue about execution status */ - bool notifyCmdQueue(); + bool notifyCmdQueue(bool cpu_wait = false); //! RTTI internal implementation virtual ObjectType objectType() const { return ObjectTypeEvent; } @@ -240,6 +243,8 @@ class Command : public Event { const Event* waitingEvent_; //!< Waiting event associated with the marker protected: + bool cpu_wait_ = false; //!< If true, then the command was issued for CPU/GPU sync + //! The Events that need to complete before this command is submitted. EventWaitList eventWaitList_; @@ -262,7 +267,10 @@ class Command : public Event { eventWaitList_(nullWaitList), commandWaitBits_(0) {} - bool terminate() { + virtual bool terminate() { + if (IS_HIP) { + releaseResources(); + } if (Agent::shouldPostEventEvents() && type() != 0) { Agent::postEventFree(as_cl(static_cast(this))); } @@ -331,6 +339,9 @@ class Command : public Event { Command* GetBatchHead() const { return batch_head_; } const Event* waitingEvent() const { return waitingEvent_; } + + //! Check if this command(should be a marker) requires CPU wait + bool CpuWaitRequested() const { return cpu_wait_; } }; class UserEvent : public Command { @@ -636,7 +647,7 @@ class FillMemoryCommand : public OneMemoryArgCommand { class StreamOperationCommand : public OneMemoryArgCommand { private: - int64_t value_; // !< Value to Wait on or to Write. + uint64_t value_; // !< Value to Wait on or to Write. uint64_t mask_; // !< Mask to be applied on signal value for Wait operation. unsigned int flags_; // !< Flags defining the Wait condition. size_t offset_; // !< Offset into memory for Write @@ -647,7 +658,7 @@ class StreamOperationCommand : public OneMemoryArgCommand { public: StreamOperationCommand(HostQueue& queue, cl_command_type cmdType, - const EventWaitList& eventWaitList, Memory& memory, const int64_t value, + const EventWaitList& eventWaitList, Memory& memory, const uint64_t value, const uint64_t mask, unsigned int flags, size_t offset, size_t sizeBytes) : OneMemoryArgCommand(queue, cmdType, eventWaitList, memory), value_(value), @@ -665,7 +676,7 @@ class StreamOperationCommand : public OneMemoryArgCommand { virtual void submit(device::VirtualDevice& device) { device.submitStreamOperation(*this); } //! Returns the value - const int64_t value() const { return value_; } + const uint64_t value() const { return value_; } //! Returns the wait mask const uint64_t mask() const { return mask_; } //! Return the wait flags @@ -905,9 +916,18 @@ class NDRangeKernelCommand : public Command { //! Return the kernel NDRange. const NDRangeContainer& sizes() const { return sizes_; } + //! updates kernel NDRange. + void setSizes(const size_t* globalWorkOffset, const size_t* globalWorkSize, + const size_t* localWorkSize) { + sizes_.update(3, globalWorkOffset, globalWorkSize, localWorkSize); + } + //! Return the shared memory size uint32_t sharedMemBytes() const { return sharedMemBytes_; } + //! updates shared memory size + void setSharedMemBytes(uint32_t sharedMemBytes) { sharedMemBytes_ = sharedMemBytes; } + //! Return the cooperative groups mode bool cooperativeGroups() const { return (extraParam_ & CooperativeGroups) ? true : false; } @@ -996,12 +1016,11 @@ class Marker : public Command { public: //! Create a new Marker Marker(HostQueue& queue, bool userVisible, const EventWaitList& eventWaitList = nullWaitList, - const Event* waitingEvent = nullptr) - : Command(queue, userVisible ? CL_COMMAND_MARKER : 0, eventWaitList, 0, waitingEvent) {} + const Event* waitingEvent = nullptr, bool cpu_wait = false) + : Command(queue, userVisible ? CL_COMMAND_MARKER : 0, eventWaitList, 0, waitingEvent) { cpu_wait_ = cpu_wait; } //! The actual command implementation. virtual void submit(device::VirtualDevice& device) { device.submitMarker(*this); } - }; /*! \brief Maps CL objects created from external ones and syncs the contents (blocking). diff --git a/platform/command_utils.hpp b/platform/command_utils.hpp index 9a8b812f0..388f20e4e 100644 --- a/platform/command_utils.hpp +++ b/platform/command_utils.hpp @@ -1,4 +1,4 @@ -/* Copyright (c) 2021-present Advanced Micro Devices, Inc. +/* Copyright (c) 2021 - 2021 Advanced Micro Devices, Inc. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/platform/commandqueue.cpp b/platform/commandqueue.cpp index 358e79df7..0e904aab8 100644 --- a/platform/commandqueue.cpp +++ b/platform/commandqueue.cpp @@ -1,4 +1,4 @@ -/* Copyright (c) 2012-present Advanced Micro Devices, Inc. +/* Copyright (c) 2012 - 2021 Advanced Micro Devices, Inc. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal @@ -107,9 +107,9 @@ void HostQueue::finish() { Command* command = nullptr; if (IS_HIP) { command = getLastQueuedCommand(true); - if (nullptr != command) { - command->awaitCompletion(); - command->release(); + // Check if the queue has nothing to process and return + if (AMD_DIRECT_DISPATCH && command == nullptr) { + return; } } if (nullptr == command) { @@ -120,8 +120,21 @@ void HostQueue::finish() { } ClPrint(LOG_DEBUG, LOG_CMD, "marker is queued"); command->enqueue(); + } + // Check HW status of the ROCcrl event. Note: not all ROCclr modes support HW status + static constexpr bool kWaitCompletion = true; + if (!device().IsHwEventReady(command->event(), kWaitCompletion)) { + ClPrint(LOG_DEBUG, LOG_CMD, "HW Event not ready, awaiting completion instead"); command->awaitCompletion(); - command->release(); + } + command->release(); + if (IS_HIP) { + ScopedLock sl(vdev()->execution()); + ScopedLock l(lastCmdLock_); + if (lastEnqueueCommand_ != nullptr) { + lastEnqueueCommand_->release(); + lastEnqueueCommand_ = nullptr; + } } ClPrint(LOG_DEBUG, LOG_CMD, "All commands finished"); } @@ -208,23 +221,21 @@ void HostQueue::append(Command& command) { return; } - if (command.waitingEvent() == nullptr) { - // Set last submitted command - Command* prevLastEnqueueCommand; - command.retain(); - { - // lastCmdLock_ ensures that lastEnqueueCommand() can retain the command before it is swapped - // out. We want to keep this critical section as short as possible, so the command should be - // released outside this section. - ScopedLock l(lastCmdLock_); - - prevLastEnqueueCommand = lastEnqueueCommand_; - lastEnqueueCommand_ = &command; - } + // Set last submitted command + Command* prevLastEnqueueCommand; + command.retain(); + { + // lastCmdLock_ ensures that lastEnqueueCommand() can retain the command before it is swapped + // out. We want to keep this critical section as short as possible, so the command should be + // released outside this section. + ScopedLock l(lastCmdLock_); - if (prevLastEnqueueCommand != nullptr) { - prevLastEnqueueCommand->release(); - } + prevLastEnqueueCommand = lastEnqueueCommand_; + lastEnqueueCommand_ = &command; + } + + if (prevLastEnqueueCommand != nullptr) { + prevLastEnqueueCommand->release(); } } @@ -238,7 +249,6 @@ Command* HostQueue::getLastQueuedCommand(bool retain) { // The batch update must be lock protected to avoid a race condition // when multiple threads submit/flush/update the batch at the same time ScopedLock sl(vdev()->execution()); - // Since the lastCmdLock_ is acquired, it is safe to read and retain the lastEnqueueCommand. // It is guaranteed that the pointer will not change. if (retain && lastEnqueueCommand_ != nullptr) { diff --git a/platform/commandqueue.hpp b/platform/commandqueue.hpp index a0e013c27..e61a45b94 100644 --- a/platform/commandqueue.hpp +++ b/platform/commandqueue.hpp @@ -1,4 +1,4 @@ -/* Copyright (c) 2012-present Advanced Micro Devices, Inc. +/* Copyright (c) 2012 - 2021 Advanced Micro Devices, Inc. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/platform/context.cpp b/platform/context.cpp index 01d7206d6..2901f7c9b 100644 --- a/platform/context.cpp +++ b/platform/context.cpp @@ -1,4 +1,4 @@ -/* Copyright (c) 2008-present Advanced Micro Devices, Inc. +/* Copyright (c) 2008 - 2021 Advanced Micro Devices, Inc. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/platform/context.hpp b/platform/context.hpp index e8bcacb1e..904d92e0a 100644 --- a/platform/context.hpp +++ b/platform/context.hpp @@ -1,4 +1,4 @@ -/* Copyright (c) 2008-present Advanced Micro Devices, Inc. +/* Copyright (c) 2008 - 2021 Advanced Micro Devices, Inc. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/platform/counter.hpp b/platform/counter.hpp index c12cc6610..425d1314f 100644 --- a/platform/counter.hpp +++ b/platform/counter.hpp @@ -1,4 +1,4 @@ -/* Copyright (c) 2008-present Advanced Micro Devices, Inc. +/* Copyright (c) 2008 - 2021 Advanced Micro Devices, Inc. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/platform/interop.hpp b/platform/interop.hpp index aac1e3cbc..4080f98b1 100644 --- a/platform/interop.hpp +++ b/platform/interop.hpp @@ -1,4 +1,4 @@ -/* Copyright (c) 2008-present Advanced Micro Devices, Inc. +/* Copyright (c) 2008 - 2021 Advanced Micro Devices, Inc. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/platform/kernel.cpp b/platform/kernel.cpp index 55d2bd1b8..a716f2d7d 100644 --- a/platform/kernel.cpp +++ b/platform/kernel.cpp @@ -1,4 +1,4 @@ -/* Copyright (c) 2008-present Advanced Micro Devices, Inc. +/* Copyright (c) 2008 - 2021 Advanced Micro Devices, Inc. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/platform/kernel.hpp b/platform/kernel.hpp index fdc58e14f..305eab180 100644 --- a/platform/kernel.hpp +++ b/platform/kernel.hpp @@ -1,4 +1,4 @@ -/* Copyright (c) 2008-present Advanced Micro Devices, Inc. +/* Copyright (c) 2008 - 2021 Advanced Micro Devices, Inc. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/platform/memory.cpp b/platform/memory.cpp index b1b3ff122..0ebe458f8 100644 --- a/platform/memory.cpp +++ b/platform/memory.cpp @@ -1,4 +1,4 @@ -/* Copyright (c) 2010-present Advanced Micro Devices, Inc. +/* Copyright (c) 2010 - 2021 Advanced Micro Devices, Inc. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal @@ -301,7 +301,7 @@ bool Memory::create(void* initFrom, bool sysMemAlloc, bool skipAlloc, bool force LogPrintfError("Can't allocate memory size - 0x%08X bytes!", getSize()); return false; } - if (isInterop()) { + if (amd::IS_HIP && isInterop()) { // Interop resources dont' have svm allocations, we use device address for mapping. amd::MemObjMap::AddMemObj( reinterpret_cast(static_cast(mem->virtualAddress())), this); @@ -415,7 +415,7 @@ Memory::~Memory() { if (NULL != deviceMemories_) { // Destroy all device memory objects for (uint i = 0; i < numDevices_; ++i) { - if (isInterop() && deviceMemories_[i].value_ != nullptr) { + if (amd::IS_HIP && isInterop() && deviceMemories_[i].value_ != nullptr) { amd::MemObjMap::RemoveMemObj(reinterpret_cast( static_cast(deviceMemories_[i].value_->virtualAddress()))); } diff --git a/platform/memory.hpp b/platform/memory.hpp index 4943d6a7d..1135fad02 100644 --- a/platform/memory.hpp +++ b/platform/memory.hpp @@ -1,4 +1,4 @@ -/* Copyright (c) 2010-present Advanced Micro Devices, Inc. +/* Copyright (c) 2010 - 2021 Advanced Micro Devices, Inc. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal @@ -38,7 +38,8 @@ #include #include #define CL_MEM_FOLLOW_USER_NUMA_POLICY (1u << 31) -#define ROCCLR_MEM_HSA_SIGNAL_MEMORY (1u << 30) +#define ROCCLR_MEM_HSA_SIGNAL_MEMORY (1u << 30) +#define ROCCLR_MEM_INTERNAL_MEMORY (1u << 29) namespace device { class Memory; @@ -650,7 +651,8 @@ class LiquidFlashFile : public RuntimeObject { class ArenaMemory: public Buffer { public: ArenaMemory(Context& context) - : Buffer(context, 0, std::numeric_limits::max(), kArenaMemoryPtr) {} + : Buffer(context, 0, std::numeric_limits::max(), + reinterpret_cast(kArenaMemoryPtr)) {} }; } // namespace amd diff --git a/platform/ndrange.cpp b/platform/ndrange.cpp index 92c97a59b..162d1249e 100644 --- a/platform/ndrange.cpp +++ b/platform/ndrange.cpp @@ -1,4 +1,4 @@ -/* Copyright (c) 2008-present Advanced Micro Devices, Inc. +/* Copyright (c) 2008 - 2021 Advanced Micro Devices, Inc. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/platform/ndrange.hpp b/platform/ndrange.hpp index 0f48f4641..cb6f926ce 100644 --- a/platform/ndrange.hpp +++ b/platform/ndrange.hpp @@ -1,4 +1,4 @@ -/* Copyright (c) 2008-present Advanced Micro Devices, Inc. +/* Copyright (c) 2008 - 2021 Advanced Micro Devices, Inc. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal @@ -136,6 +136,16 @@ class NDRangeContainer : public HeapObject { } } + //! updates nd-range container + void update(size_t dimensions, const size_t* globalWorkOffset, const size_t* globalWorkSize, + const size_t* localWorkSize) { + for (size_t i = 0; i < dimensions; ++i) { + offset_[i] = globalWorkOffset != NULL ? globalWorkOffset[i] : 0; + global_[i] = globalWorkSize[i]; + local_[i] = localWorkSize[i]; + } + } + //! Return the number of dimensions. size_t dimensions() const { return dimensions_; } diff --git a/platform/object.hpp b/platform/object.hpp index f55768383..9fca95ab2 100644 --- a/platform/object.hpp +++ b/platform/object.hpp @@ -1,4 +1,4 @@ -/* Copyright (c) 2008-present Advanced Micro Devices, Inc. +/* Copyright (c) 2008 - 2021 Advanced Micro Devices, Inc. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/platform/perfctr.hpp b/platform/perfctr.hpp index fc32e76c2..c1c9545f4 100644 --- a/platform/perfctr.hpp +++ b/platform/perfctr.hpp @@ -1,4 +1,4 @@ -/* Copyright (c) 2008-present Advanced Micro Devices, Inc. +/* Copyright (c) 2008 - 2021 Advanced Micro Devices, Inc. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/platform/prof_protocol.h b/platform/prof_protocol.h index 0ab1f0fe5..ae23fa5ab 100644 --- a/platform/prof_protocol.h +++ b/platform/prof_protocol.h @@ -1,4 +1,4 @@ -/* Copyright (c) 2018-present Advanced Micro Devices, Inc. +/* Copyright (c) 2018 - 2021 Advanced Micro Devices, Inc. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/platform/program.cpp b/platform/program.cpp index 071e5beee..98900e8a1 100644 --- a/platform/program.cpp +++ b/platform/program.cpp @@ -1,4 +1,4 @@ -/* Copyright (c) 2008-present Advanced Micro Devices, Inc. +/* Copyright (c) 2008 - 2021 Advanced Micro Devices, Inc. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/platform/program.hpp b/platform/program.hpp index ba6b88fac..f32d403dd 100644 --- a/platform/program.hpp +++ b/platform/program.hpp @@ -1,4 +1,4 @@ -/* Copyright (c) 2008-present Advanced Micro Devices, Inc. +/* Copyright (c) 2008 - 2021 Advanced Micro Devices, Inc. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/platform/runtime.cpp b/platform/runtime.cpp index fa7345f4f..63dfa37de 100644 --- a/platform/runtime.cpp +++ b/platform/runtime.cpp @@ -1,4 +1,4 @@ -/* Copyright (c) 2008-present Advanced Micro Devices, Inc. +/* Copyright (c) 2008 - 2021 Advanced Micro Devices, Inc. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/platform/runtime.hpp b/platform/runtime.hpp index 3c5a765ce..60ab44e1d 100644 --- a/platform/runtime.hpp +++ b/platform/runtime.hpp @@ -1,4 +1,4 @@ -/* Copyright (c) 2008-present Advanced Micro Devices, Inc. +/* Copyright (c) 2008 - 2021 Advanced Micro Devices, Inc. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/platform/sampler.hpp b/platform/sampler.hpp index 1cb969334..cd8b35f57 100644 --- a/platform/sampler.hpp +++ b/platform/sampler.hpp @@ -1,4 +1,4 @@ -/* Copyright (c) 2008-present Advanced Micro Devices, Inc. +/* Copyright (c) 2008 - 2021 Advanced Micro Devices, Inc. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/platform/threadtrace.hpp b/platform/threadtrace.hpp index b4af590f9..fbfbbb904 100644 --- a/platform/threadtrace.hpp +++ b/platform/threadtrace.hpp @@ -1,4 +1,4 @@ -/* Copyright (c) 2008-present Advanced Micro Devices, Inc. +/* Copyright (c) 2008 - 2021 Advanced Micro Devices, Inc. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/thread/monitor.cpp b/thread/monitor.cpp index 198873241..d7d4e2300 100644 --- a/thread/monitor.cpp +++ b/thread/monitor.cpp @@ -1,4 +1,4 @@ -/* Copyright (c) 2008-present Advanced Micro Devices, Inc. +/* Copyright (c) 2008 - 2021 Advanced Micro Devices, Inc. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/thread/monitor.hpp b/thread/monitor.hpp index 717d1ffc3..9b5f73d4f 100644 --- a/thread/monitor.hpp +++ b/thread/monitor.hpp @@ -1,4 +1,4 @@ -/* Copyright (c) 2008-present Advanced Micro Devices, Inc. +/* Copyright (c) 2008 - 2021 Advanced Micro Devices, Inc. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/thread/semaphore.cpp b/thread/semaphore.cpp index 795d8f5a0..78f8addb0 100644 --- a/thread/semaphore.cpp +++ b/thread/semaphore.cpp @@ -1,4 +1,4 @@ -/* Copyright (c) 2008-present Advanced Micro Devices, Inc. +/* Copyright (c) 2008 - 2021 Advanced Micro Devices, Inc. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/thread/semaphore.hpp b/thread/semaphore.hpp index 155439044..f2fa81e1f 100644 --- a/thread/semaphore.hpp +++ b/thread/semaphore.hpp @@ -1,4 +1,4 @@ -/* Copyright (c) 2008-present Advanced Micro Devices, Inc. +/* Copyright (c) 2008 - 2021 Advanced Micro Devices, Inc. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/thread/thread.cpp b/thread/thread.cpp index feb257659..c7a6c35b9 100644 --- a/thread/thread.cpp +++ b/thread/thread.cpp @@ -1,4 +1,4 @@ -/* Copyright (c) 2008-present Advanced Micro Devices, Inc. +/* Copyright (c) 2008 - 2021 Advanced Micro Devices, Inc. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/thread/thread.hpp b/thread/thread.hpp index 790bbfb35..ad88f7ffa 100644 --- a/thread/thread.hpp +++ b/thread/thread.hpp @@ -1,4 +1,4 @@ -/* Copyright (c) 2008-present Advanced Micro Devices, Inc. +/* Copyright (c) 2008 - 2021 Advanced Micro Devices, Inc. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/utils/concurrent.hpp b/utils/concurrent.hpp index 681486440..203212dbb 100644 --- a/utils/concurrent.hpp +++ b/utils/concurrent.hpp @@ -1,4 +1,4 @@ -/* Copyright (c) 2010-present Advanced Micro Devices, Inc. +/* Copyright (c) 2010 - 2021 Advanced Micro Devices, Inc. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/utils/debug.cpp b/utils/debug.cpp index 21c329341..6a43a0d94 100644 --- a/utils/debug.cpp +++ b/utils/debug.cpp @@ -1,4 +1,4 @@ -/* Copyright (c) 2008-present Advanced Micro Devices, Inc. +/* Copyright (c) 2008 - 2021 Advanced Micro Devices, Inc. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/utils/debug.hpp b/utils/debug.hpp index d23a2a988..055aa1c4d 100644 --- a/utils/debug.hpp +++ b/utils/debug.hpp @@ -1,4 +1,4 @@ -/* Copyright (c) 2008-present Advanced Micro Devices, Inc. +/* Copyright (c) 2008 - 2021 Advanced Micro Devices, Inc. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/utils/flags.cpp b/utils/flags.cpp index 237006258..bb4361742 100644 --- a/utils/flags.cpp +++ b/utils/flags.cpp @@ -1,4 +1,4 @@ -/* Copyright (c) 2008-present Advanced Micro Devices, Inc. +/* Copyright (c) 2008 - 2021 Advanced Micro Devices, Inc. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/utils/flags.hpp b/utils/flags.hpp index 50c0b23f5..9c826e1cc 100644 --- a/utils/flags.hpp +++ b/utils/flags.hpp @@ -1,4 +1,4 @@ -/* Copyright (c) 2009-present Advanced Micro Devices, Inc. +/* Copyright (c) 2009 - 2021 Advanced Micro Devices, Inc. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal @@ -233,8 +233,8 @@ release(uint, HIP_HIDDEN_FREE_MEM, 0, \ "0 = Disable") \ release(size_t, GPU_FORCE_BLIT_COPY_SIZE, 0, \ "Size in KB of the threshold below which to force blit instead for sdma") \ -release(bool, ROC_ACTIVE_WAIT, false, \ - "Forces unconditional active wait for GPU") \ +release(uint, ROC_ACTIVE_WAIT_TIMEOUT, 50, \ + "Forces active wait of GPU interrup for the timeout(us)") \ release(bool, ROC_ENABLE_LARGE_BAR, true, \ "Enable Large Bar if supported by the device") \ release(bool, ROC_CPU_WAIT_FOR_SIGNAL, true, \ diff --git a/utils/macros.hpp b/utils/macros.hpp index b7d9f7c8b..02fef7599 100644 --- a/utils/macros.hpp +++ b/utils/macros.hpp @@ -1,4 +1,4 @@ -/* Copyright (c) 2011-present Advanced Micro Devices, Inc. +/* Copyright (c) 2011 - 2021 Advanced Micro Devices, Inc. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/utils/util.hpp b/utils/util.hpp index 0c4753937..1e69ea431 100644 --- a/utils/util.hpp +++ b/utils/util.hpp @@ -1,4 +1,4 @@ -/* Copyright (c) 2008-present Advanced Micro Devices, Inc. +/* Copyright (c) 2008 - 2021 Advanced Micro Devices, Inc. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/utils/versions.hpp b/utils/versions.hpp index 58266fc95..433c6015f 100644 --- a/utils/versions.hpp +++ b/utils/versions.hpp @@ -1,4 +1,4 @@ -/* Copyright (c) 2010-present Advanced Micro Devices, Inc. +/* Copyright (c) 2010 - 2021 Advanced Micro Devices, Inc. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal @@ -28,7 +28,7 @@ #endif // AMD_PLATFORM_NAME #ifndef AMD_PLATFORM_BUILD_NUMBER -#define AMD_PLATFORM_BUILD_NUMBER 3315 +#define AMD_PLATFORM_BUILD_NUMBER 3349 #endif // AMD_PLATFORM_BUILD_NUMBER #ifndef AMD_PLATFORM_REVISION_NUMBER