From c37034974ac1f2529a7a73ba728ed7b58074d05d Mon Sep 17 00:00:00 2001 From: Vladislav Sytchenko Date: Tue, 11 May 2021 13:56:51 -0400 Subject: [PATCH 001/102] SWDEV-273235 - Initial support for Windows CMake This change refactors the current ROCclr cmake build to accomodate a more modular approach. This allows easier support for multiple compiler and/or multiple runtime backends. Currently supported compilers: HSAIL - enabled by ROCCLR_ENABLE_HSAIL (defaults to OFF) LC - enabled by ROCCLR_ENABLE_LC (defaults to ON) Currently supported runtimes: HSA - enabled by ROCCLR_ENABLE_HSA (defaults to ON) PAL - enabled by ROCCLR_ENABLE_PAL (defaults to OFF) Any configuration is supported as long as at least one compiler and one runtime is enabled. Since ROCclr clients can configure it differently, one cannot reuse the same ROCclr build artifacts between different clients. To assure this, this patch assumes that ROCclr will be built as part of the clients project. Change-Id: Id4a5c43634296802b8ae87d1ad5984968391ccaf --- CMakeLists.txt | 316 +----------------- cmake/FindAMD_HSA_LOADER.cmake | 55 +++ cmake/FindAMD_OPENCL.cmake | 74 ++++ cmake/FindAMD_PAL.cmake | 68 ++++ .../FindROCR.cmake => FindAMD_UGL.cmake} | 38 ++- cmake/ROCclr.cmake | 132 ++++++++ cmake/ROCclrHSA.cmake | 51 +++ ...OCclrConfig.cmake.in => ROCclrHSAIL.cmake} | 10 +- .../FindROCT.cmake => ROCclrLC.cmake} | 28 +- cmake/ROCclrPAL.cmake | 82 +++++ cmake/modules/bc2h.cmake | 59 ---- device/pal/CMakeLists.txt | 164 --------- device/rocm/CMakeLists.txt | 72 ---- 13 files changed, 505 insertions(+), 644 deletions(-) create mode 100644 cmake/FindAMD_HSA_LOADER.cmake create mode 100644 cmake/FindAMD_OPENCL.cmake create mode 100644 cmake/FindAMD_PAL.cmake rename cmake/{modules/FindROCR.cmake => FindAMD_UGL.cmake} (57%) create mode 100644 cmake/ROCclr.cmake create mode 100644 cmake/ROCclrHSA.cmake rename cmake/{ROCclrConfig.cmake.in => ROCclrHSAIL.cmake} (79%) rename cmake/{modules/FindROCT.cmake => ROCclrLC.cmake} (58%) create mode 100644 cmake/ROCclrPAL.cmake delete mode 100644 cmake/modules/bc2h.cmake delete mode 100644 device/pal/CMakeLists.txt delete mode 100644 device/rocm/CMakeLists.txt diff --git a/CMakeLists.txt b/CMakeLists.txt index 406c4f845..3150fc200 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -20,317 +20,11 @@ cmake_minimum_required(VERSION 3.5) -project(ROCclr VERSION "1.0.0" LANGUAGES C CXX) +project(ROCclr) -include(CMakePackageConfigHelpers) - -#decide whether .so is to be build or .a -set(BUILD_SHARED_LIBS ON CACHE BOOL "Build shared library (.so) or not.") - -#example command: - -#cmake -DOPENCL_DIR=/path to/opencl .. - -set(ROCCLR_CONFIG_NAME ROCclrConfig.cmake) -set(ROCCLR_TARGETS_NAME rocclr-targets.cmake) -set(ROCCLR_VERSION_NAME rocclr-config-version.cmake) -set(ROCCLR_PACKAGE_PREFIX lib/cmake/rocclr) -set(ROCCLR_PREFIX_CODE) -set(ROCCLR_TARGETS_PATH - "${CMAKE_CURRENT_BINARY_DIR}/${ROCCLR_PACKAGE_PREFIX}/${ROCCLR_TARGETS_NAME}") -set(ROCCLR_VERSION_PATH - "${CMAKE_CURRENT_BINARY_DIR}/${ROCCLR_PACKAGE_PREFIX}/${ROCCLR_VERSION_NAME}") - -# Generate the build-tree package. -configure_file("cmake/${ROCCLR_CONFIG_NAME}.in" - "${ROCCLR_PACKAGE_PREFIX}/${ROCCLR_CONFIG_NAME}" - @ONLY) - -write_basic_package_version_file("${ROCCLR_VERSION_PATH}" - VERSION "${ROCclr_VERSION}" - COMPATIBILITY SameMajorVersion) - -set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} "${CMAKE_CURRENT_SOURCE_DIR}/cmake" "${CMAKE_CURRENT_SOURCE_DIR}/cmake/modules") - -# FIXME: Handling legacy custom boolean matching "yes" or "no" -if(DEFINED USE_COMGR_LIBRARY) - if(${USE_COMGR_LIBRARY} MATCHES "yes") - set(USE_COMGR_LIBRARY ON) - elseif(${USE_COMGR_LIBRARY} MATCHES "no") - set(USE_COMGR_LIBRARY OFF) - endif() -endif() - -option(USE_COMGR_LIBRARY "Use comgr library" ON) - -find_package(amd_comgr REQUIRED CONFIG - PATHS - /opt/rocm/ - PATH_SUFFIXES - cmake/amd_comgr - lib/cmake/amd_comgr -) -message(STATUS "Code Object Manager found at ${amd_comgr_DIR}.") - -find_package(hsa-runtime64 1.3 REQUIRED CONFIG - PATHS - /opt/rocm/ - PATH_SUFFIXES - cmake/hsa-runtime64 - lib/cmake/hsa-runtime64 - lib64/cmake/hsa-runtime64 -) -message(STATUS "HSA Runtime found at ${hsa-runtime64_DIR}.") - -if( NOT OPENCL_DIR ) - find_path(OPENCL_INCLUDE_DIR - NAMES OpenCL/cl.h CL/cl.h - PATH_SUFFIXES include opencl/include inc include/x86_64 include/x64 - PATHS /opt/rocm - DOC "OpenCL include header OpenCL/cl.h or CL/cl.h" - ) - - if( NOT OPENCL_INCLUDE_DIR ) - unset(OPENCL_INCLUDE_DIR CACHE) - set(OPENCL_INCLUDE_DIR "" CACHE PATH "" FORCE ) - endif() - message(STATUS "Opencl found at ${OPENCL_INCLUDE_DIR}.") -endif() - - - -set(THREADS_PREFER_PTHREAD_FLAG ON) -find_package(Threads REQUIRED) - -set(CMAKE_CXX_STANDARD 14) -set(CMAKE_CXX_STANDARD_REQUIRED ON) -set(CMAKE_CXX_EXTENSIONS OFF) -set(CMAKE_POSITION_INDEPENDENT_CODE ON) - -if((CMAKE_CXX_COMPILER_ID STREQUAL "GNU") OR - (CMAKE_CXX_COMPILER_ID MATCHES "Clang")) - add_definitions( - # Enabling -Wextra or -pedantic will cause - # thousands of warnings. Keep things simple for now. - -Wall - # Makefile build adds -fno-strict-aliasing instead. - -Wno-strict-aliasing - # This one seems impossible to fix for now. - # There are hundreds of instances of unused vars/functions - # throughout the code base. - -Wno-unused-variable) -endif() - -add_definitions(-D__x86_64__ -DOPENCL_MAJOR=2 -DOPENCL_MINOR=1 -DCL_TARGET_OPENCL_VERSION=220 -DATI_OS_LINUX -DATI_ARCH_X86 -DLITTLEENDIAN_CPU -DATI_BITS_64 -DWITH_TARGET_AMDGCN -DOPENCL_EXPORTS -DCL_USE_DEPRECATED_OPENCL_1_0_APIS -DCL_USE_DEPRECATED_OPENCL_1_1_APIS -DCL_USE_DEPRECATED_OPENCL_1_2_APIS -DCL_USE_DEPRECATED_OPENCL_2_0_APIS -DWITH_LIGHTNING_COMPILER) -add_definitions(-DOPENCL_C_MAJOR=2 -DOPENCL_C_MINOR=0) - -set(CMAKE_SHARED_LINKER_FLAGS "-Wl,-Bsymbolic -Wl,--unresolved-symbols=report-all") -set (CMAKE_LIBRARY_OUTPUT_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/lib) - -if(CMAKE_BUILD_TYPE MATCHES "^Debug$") - add_definitions(-DDEBUG) -endif() - -if(DEV_LOG_ENABLE MATCHES "yes") - add_definitions(-DDEV_LOG_ENABLE) -endif() - -option(BUILD_LINUXPRO "Build LinuxPro" OFF) -if (BUILD_LINUXPRO) - message(STATUS "Building LinuxPro") - add_definitions(-DROCCLR_DISABLE_PREVEGA) - add_definitions(-DROCCLR_ENABLE_GL_SHARING) -endif() - -option(BUILD_PAL "Build PAL backend" OFF) -if (BUILD_PAL) - add_subdirectory(device/pal) - - add_subdirectory(compiler/sc/HSAIL) -else () - add_subdirectory(device/rocm) -endif() - -set(oclruntime_src - thread/thread.cpp - thread/monitor.cpp - thread/semaphore.cpp - utils/flags.cpp - utils/debug.cpp - device/appprofile.cpp - device/device.cpp - device/hwdebug.cpp - device/blitcl.cpp - device/blit.cpp - device/devkernel.cpp - device/devwavelimiter.cpp - device/devprogram.cpp - device/devhcprintf.cpp - device/devhcmessages.cpp - device/devhostcall.cpp - device/comgrctx.cpp - device/hsailctx.cpp - platform/activity.cpp - platform/kernel.cpp - platform/context.cpp - platform/command.cpp - platform/ndrange.cpp - platform/runtime.cpp - platform/memory.cpp - platform/program.cpp - platform/commandqueue.cpp - platform/agent.cpp - os/os_win32.cpp - os/alloc.cpp - os/os.cpp - os/os_posix.cpp - compiler/lib/utils/options.cpp - elf/elf.cpp -) - -add_library(amdrocclr_static STATIC ${oclruntime_src}) - -set_target_properties(amdrocclr_static PROPERTIES POSITION_INDEPENDENT_CODE ON) - -target_include_directories(amdrocclr_static - PUBLIC - $ - $ - $ - $ - $ - $ - $ - $ - $ - $ - # GL and EGL headers. - $ - $ - $) - -if(USE_COMGR_LIBRARY) - # FIXME: This should not be part of the public interface. Downstream - # users need to add these definitions. This should be defined in a - # config header here so other builds don't need to be aware of this. - if(${BUILD_SHARED_LIBS}) - target_compile_definitions(amdrocclr_static PUBLIC USE_COMGR_LIBRARY COMGR_DYN_DLL) - else() - target_compile_definitions(amdrocclr_static PUBLIC USE_COMGR_LIBRARY) - endif() - #Needed here to export as transitive dependency in rocclr-targets.cmake - target_link_libraries(amdrocclr_static PRIVATE amd_comgr) +if (${CMAKE_SOURCE_DIR} STREQUAL ${CMAKE_CURRENT_SOURCE_DIR}) + message(AUTHOR_WARNING "ROCclr is being built as a standalone project. This isn't supported anymore.") endif() -target_link_libraries(amdrocclr_static PUBLIC Threads::Threads) -target_link_libraries(amdrocclr_static PUBLIC dl) - - -if (BUILD_PAL) - target_sources(amdrocclr_static PRIVATE $) - target_link_libraries(amdrocclr_static PRIVATE amdhsaloader) - target_link_libraries(amdrocclr_static PRIVATE pal) - - target_compile_definitions(amdrocclr_static PRIVATE WITH_PAL_DEVICE) - - export(TARGETS amdrocclr_static rocclrpal amdhsaloader amdhsacode pal addrlib vam metrohash cwpack gpuopen ddCore tiny_printf mpack rapidjson - FILE ${ROCCLR_TARGETS_PATH}) -else() - target_compile_definitions(amdrocclr_static PRIVATE WITH_HSA_DEVICE) - - #ROCclr being static lib shall not link hsa-runtime64. - #Needed here to export as transitive dependency in rocclr-targets.cmake - target_link_libraries(amdrocclr_static PRIVATE hsa-runtime64::hsa-runtime64) - target_sources(amdrocclr_static PRIVATE $) - export(TARGETS amdrocclr_static - FILE ${ROCCLR_TARGETS_PATH}) -endif() - -if (UNIX) - set(ROCclr_DEFAULT_INSTALL_PREFIX "/opt/rocm/rocclr") - - find_library(LIBRT rt) - if (LIBRT) - target_link_libraries(amdrocclr_static PUBLIC ${LIBRT}) - endif() - - if (LIBNUMA) - target_link_libraries(amdrocclr_static PUBLIC ${LIBNUMA}) - endif() -endif() -#comment out as it's not available in cmake 3.5 -#if(CMAKE_INSTALL_PREFIX_INITIALIZED_TO_DEFAULT) -if(NOT CMAKE_INSTALL_PREFIX) - if(CMAKE_BUILD_TYPE MATCHES Debug) - set(CMAKE_INSTALL_PREFIX ${CMAKE_CURRENT_SOURCE_DIR} CACHE PATH "Installation path for rocclr" FORCE) - #elseif(CMAKE_BUILD_TYPE MATCHES Release) - else() - set(CMAKE_INSTALL_PREFIX ${ROCclr_DEFAULT_INSTALL_PREFIX} CACHE PATH "Installation path for rocclr" FORCE) - # message(FATAL_ERROR "Invalid CMAKE_BUILD_TYPE specified. Valid values are Debug and Release") - endif() -#endif() -endif() - - -install( TARGETS amdrocclr_static - EXPORT rocclr - ARCHIVE DESTINATION lib - LIBRARY DESTINATION lib - COMPONENT applications) - -install ( - DIRECTORY include/ - DESTINATION include) -install ( - DIRECTORY "${CMAKE_CURRENT_SOURCE_DIR}/elf" - DESTINATION include - FILES_MATCHING PATTERN "*.h*") -install ( - DIRECTORY "${CMAKE_CURRENT_SOURCE_DIR}/compiler" - DESTINATION include - FILES_MATCHING PATTERN "*.h*") -install ( - DIRECTORY "${CMAKE_CURRENT_SOURCE_DIR}/utils" - DESTINATION include - FILES_MATCHING PATTERN "*.h*") -install ( - DIRECTORY "${CMAKE_CURRENT_SOURCE_DIR}/platform" - DESTINATION include - FILES_MATCHING PATTERN "*.h*") -install ( - DIRECTORY "${CMAKE_CURRENT_SOURCE_DIR}/thread" - DESTINATION include - FILES_MATCHING PATTERN "*.h*") -install ( - DIRECTORY "${CMAKE_CURRENT_SOURCE_DIR}/os" - DESTINATION include - FILES_MATCHING PATTERN "*.h*") -install ( - DIRECTORY "${CMAKE_CURRENT_SOURCE_DIR}/device" - DESTINATION include - FILES_MATCHING PATTERN "*.h*") - -# Generate the install-tree package. -set(ROCCLR_PREFIX_CODE " -# Derive absolute install prefix from config file path. -get_filename_component(ROCCLR_PREFIX \"\${CMAKE_CURRENT_LIST_FILE}\" PATH)") -string(REGEX REPLACE "/" ";" count "${ROCCLR_PACKAGE_PREFIX}") -foreach(p ${count}) - set(ROCCLR_PREFIX_CODE "${ROCCLR_PREFIX_CODE} -get_filename_component(ROCCLR_PREFIX \"\${ROCCLR_PREFIX}\" PATH)") -endforeach() - -configure_file("cmake/${ROCCLR_CONFIG_NAME}.in" - "${CMAKE_CURRENT_BINARY_DIR}/${ROCCLR_CONFIG_NAME}.install" - @ONLY) - -install(FILES - "${CMAKE_CURRENT_BINARY_DIR}/${ROCCLR_CONFIG_NAME}.install" - DESTINATION "${ROCCLR_PACKAGE_PREFIX}" - RENAME "${ROCCLR_CONFIG_NAME}") -install(EXPORT rocclr - DESTINATION "${ROCCLR_PACKAGE_PREFIX}" - FILE "${ROCCLR_TARGETS_NAME}") -install(FILES - "${ROCCLR_VERSION_PATH}" - DESTINATION "${ROCCLR_PACKAGE_PREFIX}") +list(APPEND CMAKE_MODULE_PATH ${CMAKE_CURRENT_SOURCE_DIR}/cmake) +include(ROCclr) diff --git a/cmake/FindAMD_HSA_LOADER.cmake b/cmake/FindAMD_HSA_LOADER.cmake new file mode 100644 index 000000000..35f682457 --- /dev/null +++ b/cmake/FindAMD_HSA_LOADER.cmake @@ -0,0 +1,55 @@ +# Copyright (c) 2020-2021 Advanced Micro Devices, Inc. All rights reserved. +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +# THE SOFTWARE. + +if(AMD_HSA_LOADER_FOUND) + return() +endif() + +find_path(AMD_LIBELF_INCLUDE_DIR libelf.h + HINTS + ${AMD_LIBELF_PATH} + PATHS + ${CMAKE_SOURCE_DIR}/hsail-compiler/lib/loaders/elf/utils/libelf + ${CMAKE_SOURCE_DIR}/../hsail-compiler/lib/loaders/elf/utils/libelf + ${CMAKE_SOURCE_DIR}/../../hsail-compiler/lib/loaders/elf/utils/libelf) + +find_path(AMD_HSAIL_INCLUDE_DIR hsa.h + HINTS + ${AMD_SC_PATH} + PATHS + ${CMAKE_SOURCE_DIR}/sc + ${CMAKE_SOURCE_DIR}/../sc + ${CMAKE_SOURCE_DIR}/../../sc + PATH_SUFFIXES + HSAIL/include) + +include(FindPackageHandleStandardArgs) +find_package_handle_standard_args(AMD_HSA_LOADER + "\nHSA Loader not found" + AMD_LIBELF_INCLUDE_DIR AMD_HSAIL_INCLUDE_DIR) +mark_as_advanced(AMD_LIBELF_INCLUDE_DIR AMD_HSAIL_INCLUDE_DIR) + +set(USE_AMD_LIBELF "yes" CACHE FORCE "") +# TODO compiler team requested supporting sp3 disassembly +set(NO_SI_SP3 "yes" CACHE FORCE "") +set(HSAIL_COMPILER_SOURCE_DIR "${AMD_LIBELF_INCLUDE_DIR}/../../../../..") +add_subdirectory("${AMD_LIBELF_INCLUDE_DIR}" ${CMAKE_CURRENT_BINARY_DIR}/libelf) +add_subdirectory("${AMD_HSAIL_INCLUDE_DIR}/../ext/libamdhsacode" ${CMAKE_CURRENT_BINARY_DIR}/libamdhsacode) +add_subdirectory("${AMD_HSAIL_INCLUDE_DIR}/../ext/loader" ${CMAKE_CURRENT_BINARY_DIR}/loader) diff --git a/cmake/FindAMD_OPENCL.cmake b/cmake/FindAMD_OPENCL.cmake new file mode 100644 index 000000000..58b464575 --- /dev/null +++ b/cmake/FindAMD_OPENCL.cmake @@ -0,0 +1,74 @@ +# Copyright (c) 2020-2021 Advanced Micro Devices, Inc. All rights reserved. +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +# THE SOFTWARE. + +if(AMD_OPENCL_FOUND) + return() +endif() + +find_path(AMD_OPENCL_INCLUDE_DIR cl.h + HINTS + ${AMD_OPENCL_PATH} + PATHS + # gerrit repo name + ${CMAKE_SOURCE_DIR}/opencl + ${CMAKE_SOURCE_DIR}/../opencl + ${CMAKE_SOURCE_DIR}/../../opencl + # github repo name + ${CMAKE_SOURCE_DIR}/ROCm-OpenCL-Runtime + ${CMAKE_SOURCE_DIR}/../ROCm-OpenCL-Runtime + ${CMAKE_SOURCE_DIR}/../../ROCm-OpenCL-Runtime + # jenkins repo name + ${CMAKE_SOURCE_DIR}/opencl-on-vdi + ${CMAKE_SOURCE_DIR}/../opencl-on-vdi + ${CMAKE_SOURCE_DIR}/../../opencl-on-vdi + ${CMAKE_SOURCE_DIR}/opencl-on-rocclr + ${CMAKE_SOURCE_DIR}/../opencl-on-rocclr + ${CMAKE_SOURCE_DIR}/../../opencl-on-rocclr + PATH_SUFFIXES + khronos/headers/opencl2.2/CL + NO_DEFAULT_PATH) + +include(FindPackageHandleStandardArgs) +find_package_handle_standard_args(AMD_OPENCL + "\nAMD OpenCL not found" + AMD_OPENCL_INCLUDE_DIR) +mark_as_advanced(AMD_OPENCL_INCLUDE_DIR) + +set(AMD_OPENCL_DEFS + -DHAVE_CL2_HPP + -DOPENCL_MAJOR=2 + -DOPENCL_MINOR=2 + -DOPENCL_C_MAJOR=2 + -DOPENCL_C_MINOR=0 + -DCL_TARGET_OPENCL_VERSION=220 + -DCL_USE_DEPRECATED_OPENCL_1_0_APIS + -DCL_USE_DEPRECATED_OPENCL_1_1_APIS + -DCL_USE_DEPRECATED_OPENCL_1_2_APIS + -DCL_USE_DEPRECATED_OPENCL_2_0_APIS) +mark_as_advanced(AMD_OPENCL_DEFS) + +set(AMD_OPENCL_INCLUDE_DIRS + ${AMD_OPENCL_INCLUDE_DIR} + ${AMD_OPENCL_INCLUDE_DIR}/.. + ${AMD_OPENCL_INCLUDE_DIR}/../.. + ${AMD_OPENCL_INCLUDE_DIR}/../../.. + ${AMD_OPENCL_INCLUDE_DIR}/../../../.. + ${AMD_OPENCL_INCLUDE_DIR}/../../../../amdocl) +mark_as_advanced(AMD_OPENCL_INCLUDE_DIRS) diff --git a/cmake/FindAMD_PAL.cmake b/cmake/FindAMD_PAL.cmake new file mode 100644 index 000000000..ef19c0557 --- /dev/null +++ b/cmake/FindAMD_PAL.cmake @@ -0,0 +1,68 @@ +# Copyright (c) 2020-2021 Advanced Micro Devices, Inc. All rights reserved. +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +# THE SOFTWARE. + +if(AMD_PAL_FOUND) + return() +endif() + +find_path(AMD_ASIC_REG_INCLUDE_DIR nv_id.h + HINTS + ${AMD_DRIVERS_PATH} + PATHS + # p4 repo layout + ${CMAKE_SOURCE_DIR}/drivers + ${CMAKE_SOURCE_DIR}/../drivers + ${CMAKE_SOURCE_DIR}/../../drivers + # github ent repo layout + ${CMAKE_SOURCE_DIR}/drivers/drivers + ${CMAKE_SOURCE_DIR}/../drivers/drivers + ${CMAKE_SOURCE_DIR}/../../drivers/drivers + PATH_SUFFIXES + inc/asic_reg) + +find_path(AMD_HSAIL_INCLUDE_DIR hsa.h + HINTS + ${AMD_SC_PATH} + PATHS + ${CMAKE_SOURCE_DIR}/sc + ${CMAKE_SOURCE_DIR}/../sc + ${CMAKE_SOURCE_DIR}/../../sc + PATH_SUFFIXES + HSAIL/include) + +find_path(AMD_PAL_INCLUDE_DIR pal.h + HINTS + ${AMD_PAL_PATH} + PATHS + ${CMAKE_SOURCE_DIR}/pal + ${CMAKE_SOURCE_DIR}/../pal + ${CMAKE_SOURCE_DIR}/../../pal + PATH_SUFFIXES + inc/core) + +include(FindPackageHandleStandardArgs) +find_package_handle_standard_args(AMD_PAL + "\nPAL not found" + AMD_ASIC_REG_INCLUDE_DIR AMD_HSAIL_INCLUDE_DIR AMD_PAL_INCLUDE_DIR) +mark_as_advanced(AMD_ASIC_REG_INCLUDE_DIR AMD_HSAIL_INCLUDE_DIR AMD_PAL_INCLUDE_DIR) + +set(GLOBAL_ROOT_SRC_DIR "${AMD_ASIC_REG_INCLUDE_DIR}/../../..") +set(PAL_SC_PATH "${AMD_HSAIL_INCLUDE_DIR}/../..") +add_subdirectory("${AMD_PAL_INCLUDE_DIR}/../.." ${CMAKE_CURRENT_BINARY_DIR}/pal) diff --git a/cmake/modules/FindROCR.cmake b/cmake/FindAMD_UGL.cmake similarity index 57% rename from cmake/modules/FindROCR.cmake rename to cmake/FindAMD_UGL.cmake index 13862f3c1..b730de6f6 100644 --- a/cmake/modules/FindROCR.cmake +++ b/cmake/FindAMD_UGL.cmake @@ -1,4 +1,4 @@ -# Copyright (C) 2017-2021 Advanced Micro Devices, Inc. All Rights Reserved. +# Copyright (c) 2020-2021 Advanced Micro Devices, Inc. All rights reserved. # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal @@ -18,19 +18,29 @@ # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN # THE SOFTWARE. -# Try to find ROCR (Radeon Open Compute Runtime) -# -# Once found, this will define: -# - ROCR_FOUND - ROCR status (found or not found) -# - ROCR_INCLUDES - Required ROCR include directories -# - ROCR_LIBRARIES - Required ROCR libraries -find_path(FIND_ROCR_INCLUDES hsa.h HINTS /opt/rocm/include /opt/rocm/hsa/include PATH_SUFFIXES hsa) -find_library(FIND_ROCR_LIBRARIES hsa-runtime64 HINTS /opt/rocm/lib /opt/rocm/hsa/lib) +if(AMD_UGL_FOUND) + return() +endif() + +find_path(AMD_UGL_INCLUDE_DIR GL/glx.h + HINTS + ${AMD_UGL_PATH} + PATHS + # p4 repo layout + ${CMAKE_SOURCE_DIR}/drivers/ugl/inc + ${CMAKE_SOURCE_DIR}/../drivers/ugl/inc + ${CMAKE_SOURCE_DIR}/../../drivers/ugl/inc + # github ent repo layout + ${CMAKE_SOURCE_DIR}/drivers/drivers/ugl/inc + ${CMAKE_SOURCE_DIR}/../drivers/drivers/ugl/inc + ${CMAKE_SOURCE_DIR}/../../drivers/drivers/ugl/inc + NO_DEFAULT_PATH) include(FindPackageHandleStandardArgs) -find_package_handle_standard_args(ROCR DEFAULT_MSG - FIND_ROCR_INCLUDES FIND_ROCR_LIBRARIES) -mark_as_advanced(FIND_ROCR_INCLUDES FIND_ROCR_LIBRARIES) +find_package_handle_standard_args(AMD_UGL + "\nAMD UGL not found" + AMD_UGL_INCLUDE_DIR) +mark_as_advanced(AMD_UGL_INCLUDE_DIR) -set(ROCR_INCLUDES ${FIND_ROCR_INCLUDES}) -set(ROCR_LIBRARIES ${FIND_ROCR_LIBRARIES}) +set(AMD_UGL_INCLUDE_DIRS ${AMD_UGL_INCLUDE_DIR} ${ROCCLR_SRC_DIR}/device/gpu/gslbe/src/rt) +mark_as_advanced(AMD_UGL_INCLUDE_DIRS) diff --git a/cmake/ROCclr.cmake b/cmake/ROCclr.cmake new file mode 100644 index 000000000..41903515d --- /dev/null +++ b/cmake/ROCclr.cmake @@ -0,0 +1,132 @@ +# Copyright (c) 2020-2021 Advanced Micro Devices, Inc. All rights reserved. +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +# THE SOFTWARE. + +cmake_minimum_required(VERSION 3.5) + +# ROCclr abstracts the usage of multiple AMD compilers and runtimes. +# It is possible to support multiple backends concurrently in the same binary. +option(ROCCLR_ENABLE_HSAIL "Enable support for HSAIL compiler" OFF) +option(ROCCLR_ENABLE_LC "Enable support for LC compiler" ON) +option(ROCCLR_ENABLE_HSA "Enable support for HSA runtime" ON) +option(ROCCLR_ENABLE_PAL "Enable support for PAL runtime" OFF) + +if((NOT ROCCLR_ENABLE_HSAIL) AND (NOT ROCCLR_ENABLE_LC)) + message(FATAL "Support for at least one compiler needs to be enabled!") +endif() + +if((NOT ROCCLR_ENABLE_HSA) AND (NOT ROCCLR_ENABLE_PAL)) + message(FATAL "Support for at least one runtime needs to be enabled!") +endif() + +set(THREADS_PREFER_PTHREAD_FLAG ON) +find_package(Threads REQUIRED) + +find_package(AMD_OPENCL) + +add_library(rocclr STATIC) + +set(ROCCLR_SRC_DIR "${CMAKE_CURRENT_LIST_DIR}/..") +mark_as_advanced(ROCCLR_SRC_DIR) + +set_target_properties(rocclr PROPERTIES + CXX_STANDARD 14 + CXX_STANDARD_REQUIRED ON + CXX_EXTENSIONS OFF + POSITION_INDEPENDENT_CODE ON) + +target_sources(rocclr PRIVATE + ${ROCCLR_SRC_DIR}/compiler/lib/utils/options.cpp + ${ROCCLR_SRC_DIR}/device/appprofile.cpp + ${ROCCLR_SRC_DIR}/device/blit.cpp + ${ROCCLR_SRC_DIR}/device/blitcl.cpp + ${ROCCLR_SRC_DIR}/device/comgrctx.cpp + ${ROCCLR_SRC_DIR}/device/devhcmessages.cpp + ${ROCCLR_SRC_DIR}/device/devhcprintf.cpp + ${ROCCLR_SRC_DIR}/device/devhostcall.cpp + ${ROCCLR_SRC_DIR}/device/device.cpp + ${ROCCLR_SRC_DIR}/device/devkernel.cpp + ${ROCCLR_SRC_DIR}/device/devprogram.cpp + ${ROCCLR_SRC_DIR}/device/devwavelimiter.cpp + ${ROCCLR_SRC_DIR}/device/hsailctx.cpp + ${ROCCLR_SRC_DIR}/device/hwdebug.cpp + ${ROCCLR_SRC_DIR}/elf/elf.cpp + ${ROCCLR_SRC_DIR}/os/alloc.cpp + ${ROCCLR_SRC_DIR}/os/os_posix.cpp + ${ROCCLR_SRC_DIR}/os/os_win32.cpp + ${ROCCLR_SRC_DIR}/os/os.cpp + ${ROCCLR_SRC_DIR}/platform/activity.cpp + ${ROCCLR_SRC_DIR}/platform/agent.cpp + ${ROCCLR_SRC_DIR}/platform/command.cpp + ${ROCCLR_SRC_DIR}/platform/commandqueue.cpp + ${ROCCLR_SRC_DIR}/platform/context.cpp + ${ROCCLR_SRC_DIR}/platform/kernel.cpp + ${ROCCLR_SRC_DIR}/platform/memory.cpp + ${ROCCLR_SRC_DIR}/platform/ndrange.cpp + ${ROCCLR_SRC_DIR}/platform/program.cpp + ${ROCCLR_SRC_DIR}/platform/runtime.cpp + ${ROCCLR_SRC_DIR}/thread/monitor.cpp + ${ROCCLR_SRC_DIR}/thread/semaphore.cpp + ${ROCCLR_SRC_DIR}/thread/thread.cpp + ${ROCCLR_SRC_DIR}/utils/debug.cpp + ${ROCCLR_SRC_DIR}/utils/flags.cpp) + +if(WIN32) + target_compile_definitions(rocclr PUBLIC ATI_OS_WIN) +else() + target_compile_definitions(rocclr PUBLIC ATI_OS_LINUX) +endif() + +target_compile_definitions(rocclr PUBLIC + ATI_ARCH_X86 + LITTLEENDIAN_CPU + WITH_LIQUID_FLASH=0 + ${AMD_OPENCL_DEFS}) + +target_include_directories(rocclr PUBLIC + ${ROCCLR_SRC_DIR} + ${ROCCLR_SRC_DIR}/compiler/lib + ${ROCCLR_SRC_DIR}/compiler/lib/include + ${ROCCLR_SRC_DIR}/compiler/lib/backends/common + ${ROCCLR_SRC_DIR}/device + ${ROCCLR_SRC_DIR}/elf + ${ROCCLR_SRC_DIR}/include + ${AMD_OPENCL_INCLUDE_DIRS}) + +target_link_libraries(rocclr PUBLIC Threads::Threads) +# IPC on Windows is not supported +if(UNIX) + target_link_libraries(rocclr PUBLIC rt) +endif() + +if(ROCCLR_ENABLE_HSAIL) + include(ROCclrHSAIL) +endif() + +if(ROCCLR_ENABLE_LC) + include(ROCclrLC) +endif() + +if(ROCCLR_ENABLE_HSA) + include(ROCclrHSA) +endif() + +if(ROCCLR_ENABLE_PAL) + include(ROCclrPAL) +endif() diff --git a/cmake/ROCclrHSA.cmake b/cmake/ROCclrHSA.cmake new file mode 100644 index 000000000..87cb55e4c --- /dev/null +++ b/cmake/ROCclrHSA.cmake @@ -0,0 +1,51 @@ +# Copyright (c) 2020-2021 Advanced Micro Devices, Inc. All rights reserved. +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +# THE SOFTWARE. + +find_package(hsa-runtime64 1.3 REQUIRED CONFIG + PATHS + /opt/rocm/ + PATH_SUFFIXES + cmake/hsa-runtime64 + lib/cmake/hsa-runtime64 + lib64/cmake/hsa-runtime64) +target_link_libraries(rocclr PUBLIC hsa-runtime64::hsa-runtime64) + +find_package(NUMA QUIET) +if(NUMA_FOUND) + target_compile_definitions(rocclr PUBLIC ROCCLR_SUPPORT_NUMA_POLICY) + target_link_libraries(rocclr PUBLIC NUMA) +endif() + +target_sources(rocclr PRIVATE + ${ROCCLR_SRC_DIR}/device/rocm/rocappprofile.cpp + ${ROCCLR_SRC_DIR}/device/rocm/rocblit.cpp + ${ROCCLR_SRC_DIR}/device/rocm/roccounters.cpp + ${ROCCLR_SRC_DIR}/device/rocm/rocdevice.cpp + ${ROCCLR_SRC_DIR}/device/rocm/rocglinterop.cpp + ${ROCCLR_SRC_DIR}/device/rocm/rockernel.cpp + ${ROCCLR_SRC_DIR}/device/rocm/rocmemory.cpp + ${ROCCLR_SRC_DIR}/device/rocm/rocprintf.cpp + ${ROCCLR_SRC_DIR}/device/rocm/rocprogram.cpp + ${ROCCLR_SRC_DIR}/device/rocm/rocschedcl.cpp + ${ROCCLR_SRC_DIR}/device/rocm/rocsettings.cpp + ${ROCCLR_SRC_DIR}/device/rocm/rocsignal.cpp + ${ROCCLR_SRC_DIR}/device/rocm/rocvirtual.cpp) + +target_compile_definitions(rocclr PUBLIC WITH_HSA_DEVICE) diff --git a/cmake/ROCclrConfig.cmake.in b/cmake/ROCclrHSAIL.cmake similarity index 79% rename from cmake/ROCclrConfig.cmake.in rename to cmake/ROCclrHSAIL.cmake index 17ef4427b..82b80ee27 100644 --- a/cmake/ROCclrConfig.cmake.in +++ b/cmake/ROCclrHSAIL.cmake @@ -1,4 +1,4 @@ -# Copyright (C) 2017-2021 Advanced Micro Devices, Inc. All Rights Reserved. +# Copyright (c) 2020-2021 Advanced Micro Devices, Inc. All rights reserved. # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal @@ -18,10 +18,4 @@ # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN # THE SOFTWARE. -@ROCCLR_PREFIX_CODE@ - -include( CMakeFindDependencyMacro ) -find_dependency(hsa-runtime64) -find_dependency(amd_comgr) -message(STATUS "ROCclr at ${ROCclr_DIR}") -include("${ROCclr_DIR}/@ROCCLR_TARGETS_NAME@") \ No newline at end of file +target_compile_definitions(rocclr PUBLIC WITH_COMPILER_LIB HSAIL_DYN_DLL) diff --git a/cmake/modules/FindROCT.cmake b/cmake/ROCclrLC.cmake similarity index 58% rename from cmake/modules/FindROCT.cmake rename to cmake/ROCclrLC.cmake index 6ad3f59e1..669b96ab7 100644 --- a/cmake/modules/FindROCT.cmake +++ b/cmake/ROCclrLC.cmake @@ -1,4 +1,4 @@ -# Copyright (C) 2019-2021 Advanced Micro Devices, Inc. All Rights Reserved. +# Copyright (c) 2020-2021 Advanced Micro Devices, Inc. All rights reserved. # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal @@ -18,19 +18,15 @@ # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN # THE SOFTWARE. -# Try to find ROCT (Radeon Open Compute Thunk) -# -# Once found, this will define: -# - ROCT_FOUND - ROCT status (found or not found) -# - ROCT_INCLUDES - Required ROCT include directories -# - ROCT_LIBRARIES - Required ROCT libraries -find_path(FIND_ROCT_INCLUDES hsakmt.h HINTS /opt/rocm/include) -find_library(FIND_ROCT_LIBRARIES hsakmt HINTS /opt/rocm/lib) - -include(FindPackageHandleStandardArgs) -find_package_handle_standard_args(ROCT DEFAULT_MSG - FIND_ROCT_INCLUDES FIND_ROCT_LIBRARIES) -mark_as_advanced(FIND_ROCT_INCLUDES FIND_ROCT_LIBRARIES) +find_package(amd_comgr REQUIRED CONFIG + PATHS + /opt/rocm/ + PATH_SUFFIXES + cmake/amd_comgr + lib/cmake/amd_comgr) -set(ROCT_INCLUDES ${FIND_ROCT_INCLUDES}) -set(ROCT_LIBRARIES ${FIND_ROCT_LIBRARIES}) +target_compile_definitions(rocclr PUBLIC WITH_LIGHTNING_COMPILER USE_COMGR_LIBRARY) +if(BUILD_SHARED_LIBS) + target_compile_definitions(rocclr PUBLIC COMGR_DYN_DLL) +endif() +target_link_libraries(rocclr PUBLIC amd_comgr) diff --git a/cmake/ROCclrPAL.cmake b/cmake/ROCclrPAL.cmake new file mode 100644 index 000000000..aadf7ba00 --- /dev/null +++ b/cmake/ROCclrPAL.cmake @@ -0,0 +1,82 @@ +# Copyright (c) 2020-2021 Advanced Micro Devices, Inc. All rights reserved. +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +# THE SOFTWARE. + +file(STRINGS palcdefs PAL_MAJOR_VERSION REGEX "^PAL_MAJOR_VERSION = [0-9]+") +string(REGEX REPLACE "PAL_MAJOR_VERSION = " "" PAL_MAJOR_VERSION ${PAL_MAJOR_VERSION}) + +file(STRINGS palcdefs GPUOPEN_MAJOR_VERSION REGEX "^GPUOPEN_MAJOR_VERSION = [0-9]+") +string(REGEX REPLACE "GPUOPEN_MAJOR_VERSION = " "" GPUOPEN_MAJOR_VERSION ${GPUOPEN_MAJOR_VERSION}) + +set(PAL_CLIENT "OCL") + +set(PAL_CLIENT_INTERFACE_MAJOR_VERSION ${PAL_MAJOR_VERSION}) +set(GPUOPEN_CLIENT_INTERFACE_MAJOR_VERSION ${GPUOPEN_MAJOR_VERSION}) +set(GPUOPEN_CLIENT_INTERFACE_MINOR_VERSION 0) + +set(PAL_CLOSED_SOURCE ON) +set(PAL_DEVELOPER_BUILD OFF) +set(PAL_BUILD_GPUOPEN ON) +set(PAL_BUILD_SCPC OFF) +set(PAL_BUILD_VIDEO OFF) +set(PAL_BUILD_DTIF OFF) +set(PAL_BUILD_OSS ON) +set(PAL_BUILD_SECURITY OFF) +set(PAL_SPPAP_CLOSED_SOURCE OFF) +set(PAL_BUILD_GFX ON) +set(PAL_BUILD_NULL_DEVICE OFF) +set(PAL_BUILD_GFX6 ON) +set(PAL_BUILD_GFX9 ON) + +find_package(AMD_PAL) +find_package(AMD_HSA_LOADER) +find_package(AMD_UGL) + +target_sources(rocclr PRIVATE + ${ROCCLR_SRC_DIR}/device/pal/palappprofile.cpp + ${ROCCLR_SRC_DIR}/device/pal/palblit.cpp + ${ROCCLR_SRC_DIR}/device/pal/palconstbuf.cpp + ${ROCCLR_SRC_DIR}/device/pal/palcounters.cpp + ${ROCCLR_SRC_DIR}/device/pal/paldebugmanager.cpp + ${ROCCLR_SRC_DIR}/device/pal/paldevice.cpp + ${ROCCLR_SRC_DIR}/device/pal/paldeviced3d10.cpp + ${ROCCLR_SRC_DIR}/device/pal/paldeviced3d11.cpp + ${ROCCLR_SRC_DIR}/device/pal/paldeviced3d9.cpp + ${ROCCLR_SRC_DIR}/device/pal/paldevicegl.cpp + ${ROCCLR_SRC_DIR}/device/pal/palgpuopen.cpp + ${ROCCLR_SRC_DIR}/device/pal/palkernel.cpp + ${ROCCLR_SRC_DIR}/device/pal/palmemory.cpp + ${ROCCLR_SRC_DIR}/device/pal/palprintf.cpp + ${ROCCLR_SRC_DIR}/device/pal/palprogram.cpp + ${ROCCLR_SRC_DIR}/device/pal/palresource.cpp + ${ROCCLR_SRC_DIR}/device/pal/palschedcl.cpp + ${ROCCLR_SRC_DIR}/device/pal/palsettings.cpp + ${ROCCLR_SRC_DIR}/device/pal/palsignal.cpp + ${ROCCLR_SRC_DIR}/device/pal/palthreadtrace.cpp + ${ROCCLR_SRC_DIR}/device/pal/paltimestamp.cpp + ${ROCCLR_SRC_DIR}/device/pal/palvirtual.cpp) + +target_compile_definitions(rocclr PUBLIC WITH_PAL_DEVICE PAL_GPUOPEN_OCL) +target_include_directories(rocclr PUBLIC ${AMD_UGL_INCLUDE_DIRS}) +target_link_libraries(rocclr PUBLIC pal amdhsaloader) + +# support for OGL/D3D interop +if(WIN32) + target_link_libraries(rocclr PUBLIC opengl32.lib dxguid.lib) +endif() diff --git a/cmake/modules/bc2h.cmake b/cmake/modules/bc2h.cmake deleted file mode 100644 index 4a7664539..000000000 --- a/cmake/modules/bc2h.cmake +++ /dev/null @@ -1,59 +0,0 @@ -# Copyright (C) 2017-2021 Advanced Micro Devices, Inc. All Rights Reserved. -# -# Permission is hereby granted, free of charge, to any person obtaining a copy -# of this software and associated documentation files (the "Software"), to deal -# in the Software without restriction, including without limitation the rights -# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -# copies of the Software, and to permit persons to whom the Software is -# furnished to do so, subject to the following conditions: -# -# The above copyright notice and this permission notice shall be included in -# all copies or substantial portions of the Software. -# -# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -# THE SOFTWARE. - -file(WRITE ${CMAKE_CURRENT_BINARY_DIR}/bc2h.c -"#include \n" -"int main(int argc, char **argv){\n" -" FILE *ifp, *ofp;\n" -" int c, i, l;\n" -" if (argc != 4) return 1;\n" -" ifp = fopen(argv[1], \"rb\");\n" -" if (!ifp) return 1;\n" -" i = fseek(ifp, 0, SEEK_END);\n" -" if (i < 0) return 1;\n" -" l = ftell(ifp);\n" -" if (l < 0) return 1;\n" -" i = fseek(ifp, 0, SEEK_SET);\n" -" if (i < 0) return 1;\n" -" ofp = fopen(argv[2], \"wb+\");\n" -" if (!ofp) return 1;\n" -" fprintf(ofp, \"#define %s_size %d\\n\\n\"\n" -" \"#if defined __GNUC__\\n\"\n" -" \"__attribute__((aligned (4096)))\\n\"\n" -" \"#elif defined _MSC_VER\\n\"\n" -" \"__declspec(align(4096))\\n\"\n" -" \"#endif\\n\"\n" -" \"static const unsigned char %s[%s_size+1] = {\",\n" -" argv[3], l,\n" -" argv[3], argv[3]);\n" -" i = 0;\n" -" while ((c = getc(ifp)) != EOF) {\n" -" if (0 == (i&7)) fprintf(ofp, \"\\n \");\n" -" fprintf(ofp, \" 0x%02x,\", c);\n" -" ++i;\n" -" }\n" -" fprintf(ofp, \" 0x00\\n};\\n\\n\");\n" -" fclose(ifp);\n" -" fclose(ofp);\n" -" return 0;\n" -"}\n" -) - -add_executable(bc2h ${CMAKE_CURRENT_BINARY_DIR}/bc2h.c) diff --git a/device/pal/CMakeLists.txt b/device/pal/CMakeLists.txt deleted file mode 100644 index 121da3070..000000000 --- a/device/pal/CMakeLists.txt +++ /dev/null @@ -1,164 +0,0 @@ -# Copyright (C) 2020-2021 Advanced Micro Devices, Inc. All Rights Reserved. -# -# Permission is hereby granted, free of charge, to any person obtaining a copy -# of this software and associated documentation files (the "Software"), to deal -# in the Software without restriction, including without limitation the rights -# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -# copies of the Software, and to permit persons to whom the Software is -# furnished to do so, subject to the following conditions: -# -# The above copyright notice and this permission notice shall be included in -# all copies or substantial portions of the Software. -# -# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -# THE SOFTWARE. - -### -file(STRINGS palcdefs PAL_MAJOR_VERSION REGEX "^PAL_MAJOR_VERSION = [0-9]+") -string(REGEX REPLACE "PAL_MAJOR_VERSION = " "" PAL_MAJOR_VERSION ${PAL_MAJOR_VERSION}) - -file(STRINGS palcdefs GPUOPEN_MAJOR_VERSION REGEX "^GPUOPEN_MAJOR_VERSION = [0-9]+") -string(REGEX REPLACE "GPUOPEN_MAJOR_VERSION = " "" GPUOPEN_MAJOR_VERSION ${GPUOPEN_MAJOR_VERSION}) - -### Create PAL backend library for ROCclr -add_library(rocclrpal OBJECT "") - -# Add defines necessary for PAL build -set(PAL_CLIENT_INTERFACE_MAJOR_VERSION ${PAL_MAJOR_VERSION} - CACHE STRING "PAL major interface: ${PAL_MAJOR_VERSION}" FORCE) -set(GPUOPEN_CLIENT_INTERFACE_MAJOR_VERSION ${GPUOPEN_MAJOR_VERSION} - CACHE STRING "GPU open major interface: ${GPUOPEN_MAJOR_VERSION}" FORCE) -set(GPUOPEN_CLIENT_INTERFACE_MINOR_VERSION "0" CACHE STRING "GPU open minor interface: 0" FORCE) - -set(PAL_CLIENT "OCL" CACHE STRING "Build PAL OCL support" FORCE) - -set(PAL_BUILD_GPUOPEN ON CACHE BOOL "Build GPUOpen support") -set(PAL_BUILD_VIDEO OFF CACHE BOOL "Don't build PAL with Video support") -set(PAL_BUILD_SCPC OFF CACHE BOOL "Don't build SCPC") - -set(PAL_CLOSED_SOURCE ON CACHE BOOL "Build closed source PAL") - -set(PAL_BUILD_POLARIS22 ON CACHE BOOL "Build PAL with Polaris support") -set(PAL_BUILD_GFX9 ON CACHE BOOL "Build PAL with GFX9 support") -set(PAL_BUILD_OSS4 ON CACHE BOOL "Build PAL with OSS4 support") -set(PAL_BUILD_VEGA12 ON CACHE BOOL "Build PAL with VEGA12 support") -set(PAL_BUILD_VEGA20 ON CACHE BOOL "Build PAL with GFX9 support") -set(PAL_BUILD_RAVEN1 ON CACHE BOOL "Build PAL with Raven1 support") -set(PAL_BUILD_RAVEN2 ON CACHE BOOL "Build PAL with Raven2 support") -set(PAL_BUILD_RENOIR ON CACHE BOOL "Build PAL with Renoir support") - -set(PAL_BUILD_GFX10 ON CACHE BOOL "Build PAL with GFX10 support") -set(PAL_BUILD_NAVI12 ON CACHE BOOL "Build PAL with Navi12 support") -set(PAL_BUILD_NAVI14 ON CACHE BOOL "Build PAL with Navi14 support") - -option(PAL_DEVELOPER_BUILD "No developer build" OFF) - -set(PYTHON_EXECUTABLE /usr/bin/python3) - -# Build PAL . . . -set(GLOBAL_ROOT_SRC_DIR ${CMAKE_CURRENT_SOURCE_DIR}) -add_subdirectory(palbe) - -# Setup defines for the client compilation -target_compile_definitions(rocclrpal PRIVATE - PAL_CLIENT_INTERFACE_MAJOR_VERSION=${PAL_CLIENT_INTERFACE_MAJOR_VERSION}) -target_compile_definitions(rocclrpal PRIVATE - GPUOPEN_CLIENT_INTERFACE_MAJOR_VERSION=${GPUOPEN_CLIENT_INTERFACE_MAJOR_VERSION}) -target_compile_definitions(rocclrpal PRIVATE - GPUOPEN_CLIENT_INTERFACE_MINOR_VERSION=${GPUOPEN_CLIENT_INTERFACE_MINOR_VERSION}) - -target_compile_definitions(rocclrpal PRIVATE PAL_CLIENT_OCL=1) -target_compile_definitions(rocclrpal PRIVATE PAL_GPUOPEN_OCL=1) -target_compile_definitions(rocclrpal PRIVATE PAL_BUILD_GPUOPEN=1) -target_compile_definitions(rocclrpal PRIVATE PAL_BUILD_SCPC=0) -target_compile_definitions(rocclrpal PRIVATE PAL_CLOSED_SOURCE=1) -target_compile_definitions(rocclrpal PRIVATE PAL_BUILD_GPUOPEN=1) - -# GFX9 targets -target_compile_definitions(rocclrpal PRIVATE PAL_BUILD_POLARIS22=1) -target_compile_definitions(rocclrpal PRIVATE PAL_BUILD_GFX9=1) -target_compile_definitions(rocclrpal PRIVATE PAL_BUILD_OSS4=1) -target_compile_definitions(rocclrpal PRIVATE PAL_BUILD_VEGA12=1) -target_compile_definitions(rocclrpal PRIVATE PAL_BUILD_VEGA20=1) -target_compile_definitions(rocclrpal PRIVATE PAL_BUILD_RAVEN1=1) -target_compile_definitions(rocclrpal PRIVATE PAL_BUILD_RAVEN2=1) -target_compile_definitions(rocclrpal PRIVATE PAL_BUILD_RENOIR=1) - -# GFX10 targets -target_compile_definitions(rocclrpal PRIVATE PAL_BUILD_GFX10=1) -target_compile_definitions(rocclrpal PRIVATE PAL_BUILD_NAVI12=1) -target_compile_definitions(rocclrpal PRIVATE PAL_BUILD_NAVI14=1) - -target_compile_definitions(rocclrpal PRIVATE OPENCL_MAINLINE=1) - -if(${USE_COMGR_LIBRARY} MATCHES "yes") - target_compile_definitions(rocclrpal PRIVATE USE_COMGR_LIBRARY) -endif() - -if(UNIX) - target_compile_definitions(rocclrpal PRIVATE PAL_AMDGPU_BUILD) -endif() - -target_include_directories(rocclrpal PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/compiler/sc/HSAIL/include) -target_include_directories(rocclrpal PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/../asic_reg) - -target_include_directories(rocclrpal PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/device/pal/palbe) -target_include_directories(rocclrpal PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/device/pal/palbe/inc) -target_include_directories(rocclrpal PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/device/pal/palbe/inc/core) -target_include_directories(rocclrpal PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/device/pal/palbe/inc/util) -target_include_directories(rocclrpal PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/device/pal/palbe/shared/gpuopen/inc) -target_include_directories(rocclrpal PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/device/pal/palbe/shared/gpuopen/third_party/metrohash/src) -target_include_directories(rocclrpal PRIVATE ${CMAKE_CURRENT_BINARY_DIR}) - -target_include_directories(rocclrpal - PUBLIC - $ - $ - $ - # GL and EGL headers - $ - $ - $ - PRIVATE - ${OPENCL_DIR} - ${PROJECT_SOURCE_DIR}/compiler/lib - ${PROJECT_SOURCE_DIR}/compiler/lib/include - ${PROJECT_SOURCE_DIR}/compiler/lib/backends/common - ${CMAKE_CURRENT_BINARY_DIR} - ${ROCM_OCL_INCLUDES} - ${ROCR_INCLUDES}) - - -target_sources(rocclrpal PRIVATE - palappprofile.cpp - palblit.cpp - palconstbuf.cpp - palcounters.cpp - paldebugmanager.cpp - paldevice.cpp - paldeviced3d10.cpp - paldeviced3d11.cpp - paldeviced3d9.cpp - paldevicegl.cpp - palgpuopen.cpp - palkernel.cpp - palmemory.cpp - palprintf.cpp - palprogram.cpp - palresource.cpp - palschedcl.cpp - palsettings.cpp - palsignal.cpp - palthreadtrace.cpp - paltimestamp.cpp - palvirtual.cpp -) - -set_target_properties(rocclrpal PROPERTIES POSITION_INDEPENDENT_CODE ON) - - diff --git a/device/rocm/CMakeLists.txt b/device/rocm/CMakeLists.txt deleted file mode 100644 index 7d6a69904..000000000 --- a/device/rocm/CMakeLists.txt +++ /dev/null @@ -1,72 +0,0 @@ -# Copyright (C) 2017-2021 Advanced Micro Devices, Inc. All Rights Reserved. -# -# Permission is hereby granted, free of charge, to any person obtaining a copy -# of this software and associated documentation files (the "Software"), to deal -# in the Software without restriction, including without limitation the rights -# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -# copies of the Software, and to permit persons to whom the Software is -# furnished to do so, subject to the following conditions: -# -# The above copyright notice and this permission notice shall be included in -# all copies or substantial portions of the Software. -# -# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -# THE SOFTWARE. - -add_library(oclrocm OBJECT - roccounters.cpp - rocprintf.cpp - rocprogram.cpp - rocmemory.cpp - rocdevice.cpp - rocblit.cpp - rockernel.cpp - rocvirtual.cpp - rocglinterop.cpp - rocappprofile.cpp - rocsettings.cpp - rocschedcl.cpp - rocsignal.cpp -) - -target_include_directories(oclrocm - PUBLIC - $ - $ - $ - # GL and EGL headers - $ - $ - $ - PRIVATE - ${OPENCL_DIR} - ${PROJECT_SOURCE_DIR}/compiler/lib - ${PROJECT_SOURCE_DIR}/compiler/lib/backends/common - ${PROJECT_SOURCE_DIR}/elf - ${CMAKE_CURRENT_BINARY_DIR} - ${ROCM_OCL_INCLUDES} - $) - - -if(USE_COMGR_LIBRARY) - if(${BUILD_SHARED_LIBS}) - target_compile_definitions(oclrocm PRIVATE USE_COMGR_LIBRARY COMGR_DYN_DLL) - else() - target_compile_definitions(oclrocm PRIVATE USE_COMGR_LIBRARY) - endif() -endif() - -if (UNIX) - find_library(LIBNUMA numa) - if (LIBNUMA) - target_compile_definitions(oclrocm PRIVATE ROCCLR_SUPPORT_NUMA_POLICY) - message(STATUS "Found: ${LIBNUMA}") - endif() -endif() - -set_target_properties(oclrocm PROPERTIES POSITION_INDEPENDENT_CODE ON) From d53a119d2ea8fc4febb4beb071c6f33de34480f3 Mon Sep 17 00:00:00 2001 From: German Andryeyev Date: Thu, 27 May 2021 16:48:03 -0400 Subject: [PATCH 002/102] SWDEV-287137 - Add blocking signal logic With HIP API callback runtime has to stall the queue until the callback is done. Rocclr will introduce SW blocking HSA signal, which will be released after the callback is done. Change-Id: I6411f3efab31b468e3b87ebb5c8d155e116b613d --- device/rocm/rocvirtual.cpp | 66 +++++++++++++++++++++++++++----------- device/rocm/rocvirtual.hpp | 17 ++++++++-- 2 files changed, 62 insertions(+), 21 deletions(-) diff --git a/device/rocm/rocvirtual.cpp b/device/rocm/rocvirtual.cpp index 8e0ffa2bc..e8e21a5c7 100644 --- a/device/rocm/rocvirtual.cpp +++ b/device/rocm/rocvirtual.cpp @@ -115,7 +115,9 @@ void Timestamp::checkGpuTime() { uint64_t end = 0; for (auto it : signals_) { - if (hsa_signal_load_relaxed(it->signal_) > 0) { + // Ignore the wait if runtime processes API callback, because the signal value is bigger + // than expected and the value reset will occur after API callback is done + if (GetCallbackSignal().handle == 0) { WaitForSignal(it->signal_); } // Avoid profiling data for the sync barrier, in tiny performance tests the first call @@ -153,7 +155,7 @@ bool HsaAmdSignalHandler(hsa_signal_value_t value, void* arg) { ((thread = new amd::HostThread()) != nullptr && thread == amd::Thread::current()))) { return false; } - amd::ScopedLock sl(ts->gpu()->execution()); + if (ts->gpu()->isProfilerAttached()) { amd::Command* head = ts->getParsedCommand(); if (head == nullptr) { @@ -185,9 +187,17 @@ bool HsaAmdSignalHandler(hsa_signal_value_t value, void* arg) { ClPrint(amd::LOG_INFO, amd::LOG_SIG, "Handler: value(%d), timestamp(%p), handle(0x%lx)", static_cast(value), arg, ts->HwProfiling() ? ts->Signals()[0]->signal_.handle : 0); + // Save callback signal + hsa_signal_t callback_signal = ts->GetCallbackSignal(); + // Update the batch, since signal is complete ts->gpu()->updateCommandsState(ts->command().GetBatchHead()); + // Reset API callback signal. It will release AQL queue and start commands processing + if (callback_signal.handle != 0) { + hsa_signal_subtract_relaxed(callback_signal, 1); + } + // Return false, so the callback will not be called again for this signal return false; } @@ -371,8 +381,17 @@ hsa_signal_t VirtualGPU::HwQueueTracker::ActiveSignal( // If direct dispatch is enabled and the batch head isn't null, then it's a marker and // requires the batch update upon HSA signal completion if (AMD_DIRECT_DISPATCH && (ts->command().GetBatchHead() != nullptr)) { + uint32_t init_value = kInitSignalValueOne; + // If API callback is enabled, then use a blocking signal for AQL queue. + // HSA signal will be acquired in SW and released after HSA signal callback + if (ts->command().Callback() != nullptr) { + ts->SetCallbackSignal(prof_signal->signal_); + // Blocks AQL queue from further processing + hsa_signal_add_relaxed(prof_signal->signal_, 1); + init_value += 1; + } hsa_status_t result = hsa_amd_signal_async_handler(prof_signal->signal_, - HSA_SIGNAL_CONDITION_LT, kInitSignalValueOne, &HsaAmdSignalHandler, ts); + HSA_SIGNAL_CONDITION_LT, init_value, &HsaAmdSignalHandler, ts); if (HSA_STATUS_SUCCESS != result) { LogError("hsa_amd_signal_async_handler() failed to set the handler!"); } else { @@ -396,7 +415,7 @@ std::vector& VirtualGPU::HwQueueTracker::WaitingSignal(HwQueueEngi // Does runtime switch the active engine? if (engine != engine_) { - // Yes, return the signla from the previous operation for a wait + // Yes, return the signal from the previous operation for a wait engine_ = engine; explicit_wait = true; } else { @@ -404,8 +423,8 @@ std::vector& VirtualGPU::HwQueueTracker::WaitingSignal(HwQueueEngi if (engine == HwQueueEngine::Unknown) { explicit_wait = true; } else { - // Check if skip wait optimizaiton is enabled. It will try to predice the same engine in ROCr - // and ignore signal wait, relying on in-order engine execution + // Check if skip wait optimization is enabled. It will try to predict the same engine in ROCr + // and ignore the signal wait, relying on in-order engine execution const Settings& settings = gpu_.dev().settings(); if (!settings.skip_copy_sync_ && (engine != HwQueueEngine::Compute)) { explicit_wait = true; @@ -414,24 +433,33 @@ std::vector& VirtualGPU::HwQueueTracker::WaitingSignal(HwQueueEngi } // Check if a wait is required if (explicit_wait) { - ProfilingSignal** prof_signal; + bool skip_internal_signal = false; + + for (uint32_t i = 0; i < external_signals_.size(); ++i) { + // If external signal matches internal one, then skip it + if (external_signals_[i]->signal_.handle == + signal_list_[current_id_]->signal_.handle) { + skip_internal_signal = true; + } + } // Add the oldest signal into the tracking for a wait - external_signals_.push_back(signal_list_[current_id_]); - prof_signal = &external_signals_[0]; + if (!skip_internal_signal) { + external_signals_.push_back(signal_list_[current_id_]); + } // Validate all signals for the wait and skip already completed for (uint32_t i = 0; i < external_signals_.size(); ++i) { // Early signal status check - if (hsa_signal_load_relaxed(prof_signal[i]->signal_) > 0) { + if (hsa_signal_load_relaxed(external_signals_[i]->signal_) > 0) { const Settings& settings = gpu_.dev().settings(); // Actively wait on CPU for 750 us to avoid extra overheads of signal tracking on GPU - if (!WaitForSignal(prof_signal[i]->signal_)) { + if (!WaitForSignal(external_signals_[i]->signal_)) { if (settings.cpu_wait_for_signal_) { // Wait on CPU for completion if requested - CpuWaitForSignal(prof_signal[i]); + CpuWaitForSignal(external_signals_[i]); } else { // Add HSA signal for tracking on GPU - waiting_signals_.push_back(prof_signal[i]->signal_); + waiting_signals_.push_back(external_signals_[i]->signal_); } } } @@ -891,12 +919,6 @@ void VirtualGPU::dispatchBarrierPacket(uint16_t packetHeader, bool skipSignal) { __atomic_store_n(reinterpret_cast(aql_loc), packetHeader, __ATOMIC_RELEASE); hsa_signal_store_screlease(gpu_queue_->doorbell_signal, index); - // Clear dependent signals for the next packet - barrier_packet_.dep_signal[0] = hsa_signal_t{}; - barrier_packet_.dep_signal[1] = hsa_signal_t{}; - barrier_packet_.dep_signal[2] = hsa_signal_t{}; - barrier_packet_.dep_signal[3] = hsa_signal_t{}; - barrier_packet_.dep_signal[4] = hsa_signal_t{}; ClPrint(amd::LOG_DEBUG, amd::LOG_AQL, "[%zx] HWq=0x%zx, BarrierAND Header = 0x%x (type=%d, barrier=%d, acquire=%d," " release=%d), " @@ -913,6 +935,12 @@ void VirtualGPU::dispatchBarrierPacket(uint16_t packetHeader, bool skipSignal) { barrier_packet_.dep_signal[0], barrier_packet_.dep_signal[1], barrier_packet_.dep_signal[2], barrier_packet_.dep_signal[3], barrier_packet_.dep_signal[4], barrier_packet_.completion_signal); + // Clear dependent signals for the next packet + barrier_packet_.dep_signal[0] = hsa_signal_t{}; + barrier_packet_.dep_signal[1] = hsa_signal_t{}; + barrier_packet_.dep_signal[2] = hsa_signal_t{}; + barrier_packet_.dep_signal[3] = hsa_signal_t{}; + barrier_packet_.dep_signal[4] = hsa_signal_t{}; } // ================================================================================================ diff --git a/device/rocm/rocvirtual.hpp b/device/rocm/rocvirtual.hpp index 85987083b..f39d901bf 100644 --- a/device/rocm/rocvirtual.hpp +++ b/device/rocm/rocvirtual.hpp @@ -90,7 +90,11 @@ class Timestamp : public amd::HeapObject { VirtualGPU* gpu_; //!< Virtual GPU, associated with this timestamp const amd::Command& command_; //!< Command, associated with this timestamp amd::Command* parsedCommand_; //!< Command down the list, considering command_ as head - std::vector signals_; + std::vector signals_; //!< The list of all signals, associated with the TS + hsa_signal_t callback_signal_; //!< Signal associated with a callback for possible later update + + Timestamp(const Timestamp&) = delete; + Timestamp& operator=(const Timestamp&) = delete; public: Timestamp(VirtualGPU* gpu, const amd::Command& command) @@ -98,7 +102,8 @@ class Timestamp : public amd::HeapObject { , end_(0) , gpu_(gpu) , command_(command) - , parsedCommand_(nullptr) {} + , parsedCommand_(nullptr) + , callback_signal_(hsa_signal_t{}) {} ~Timestamp() {} @@ -141,6 +146,14 @@ class Timestamp : public amd::HeapObject { //! Returns virtual GPU device, used with this timestamp VirtualGPU* gpu() const { return gpu_; } + + //! Updates the callback signal + void SetCallbackSignal(hsa_signal_t callback_signal) { + callback_signal_ = callback_signal; + } + + //! Returns the callback signal + hsa_signal_t GetCallbackSignal() const { return callback_signal_; } }; class VirtualGPU : public device::VirtualDevice { From 48e96721ed7d6167b3cfb277eedb13edfec15b59 Mon Sep 17 00:00:00 2001 From: Chauncey Hui Date: Fri, 28 May 2021 03:00:06 -0400 Subject: [PATCH 003/102] SWDEV-2 - Change OpenCL version number from 3315 to 3316 --- utils/versions.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/utils/versions.hpp b/utils/versions.hpp index 58266fc95..4416732e3 100644 --- a/utils/versions.hpp +++ b/utils/versions.hpp @@ -28,7 +28,7 @@ #endif // AMD_PLATFORM_NAME #ifndef AMD_PLATFORM_BUILD_NUMBER -#define AMD_PLATFORM_BUILD_NUMBER 3315 +#define AMD_PLATFORM_BUILD_NUMBER 3316 #endif // AMD_PLATFORM_BUILD_NUMBER #ifndef AMD_PLATFORM_REVISION_NUMBER From 520374bd75eff25c9a936172a18cadc1c241c414 Mon Sep 17 00:00:00 2001 From: Vladislav Sytchenko Date: Thu, 27 May 2021 16:26:35 -0400 Subject: [PATCH 004/102] SWDEV-273235 - Fix Windows CMake build Change-Id: I337b8d3b38a492b77b55602ab3a6bb3c05e693e0 --- cmake/ROCclrPAL.cmake | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/cmake/ROCclrPAL.cmake b/cmake/ROCclrPAL.cmake index aadf7ba00..5b4e8f623 100644 --- a/cmake/ROCclrPAL.cmake +++ b/cmake/ROCclrPAL.cmake @@ -18,10 +18,10 @@ # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN # THE SOFTWARE. -file(STRINGS palcdefs PAL_MAJOR_VERSION REGEX "^PAL_MAJOR_VERSION = [0-9]+") +file(STRINGS ${ROCCLR_SRC_DIR}/device/pal/palcdefs PAL_MAJOR_VERSION REGEX "^PAL_MAJOR_VERSION = [0-9]+") string(REGEX REPLACE "PAL_MAJOR_VERSION = " "" PAL_MAJOR_VERSION ${PAL_MAJOR_VERSION}) -file(STRINGS palcdefs GPUOPEN_MAJOR_VERSION REGEX "^GPUOPEN_MAJOR_VERSION = [0-9]+") +file(STRINGS ${ROCCLR_SRC_DIR}/device/pal/palcdefs GPUOPEN_MAJOR_VERSION REGEX "^GPUOPEN_MAJOR_VERSION = [0-9]+") string(REGEX REPLACE "GPUOPEN_MAJOR_VERSION = " "" GPUOPEN_MAJOR_VERSION ${GPUOPEN_MAJOR_VERSION}) set(PAL_CLIENT "OCL") From 25c80183f72aabd28aaff59e111f17982f4e4351 Mon Sep 17 00:00:00 2001 From: Chauncey Hui Date: Sat, 29 May 2021 03:00:06 -0400 Subject: [PATCH 005/102] SWDEV-2 - Change OpenCL version number from 3316 to 3317 --- utils/versions.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/utils/versions.hpp b/utils/versions.hpp index 4416732e3..d60b27dfb 100644 --- a/utils/versions.hpp +++ b/utils/versions.hpp @@ -28,7 +28,7 @@ #endif // AMD_PLATFORM_NAME #ifndef AMD_PLATFORM_BUILD_NUMBER -#define AMD_PLATFORM_BUILD_NUMBER 3316 +#define AMD_PLATFORM_BUILD_NUMBER 3317 #endif // AMD_PLATFORM_BUILD_NUMBER #ifndef AMD_PLATFORM_REVISION_NUMBER From 54a73abfc73f3318288bbe8aa6ea1a67cf7a88ce Mon Sep 17 00:00:00 2001 From: German Andryeyev Date: Mon, 31 May 2021 12:32:29 -0400 Subject: [PATCH 006/102] SWDEV-240804 - Add coarse grain memory support Add an extension to memory advise to disable cache coherency for better performance Change-Id: I283703d81d9c36ddfa2c8fffa15eef60e2195056 --- device/device.hpp | 9 +++++---- device/rocm/rocdevice.cpp | 6 ++++++ 2 files changed, 11 insertions(+), 4 deletions(-) diff --git a/device/device.hpp b/device/device.hpp index a41dbbee1..991cfc4bc 100644 --- a/device/device.hpp +++ b/device/device.hpp @@ -97,10 +97,11 @@ enum MemoryAdvice : uint32_t { UnsetReadMostly = 2, ///< Undo the effect of hipMemAdviseSetReadMostly SetPreferredLocation = 3, ///< Set the preferred location for the data as the specified device UnsetPreferredLocation = 4, ///< Clear the preferred location for the data - SetAccessedBy = 5, ///< Data will be accessed by the specified device, - ///< so prevent page faults as much as possible - UnsetAccessedBy = 6 ///< Let the Unified Memory subsystem decide on - ///< the page faulting policy for the specified device + SetAccessedBy = 5, ///< Data will be accessed by the specified device, reducing + ///< the amount of page faults + UnsetAccessedBy = 6, ///< HMM decides on the page faulting policy for the specified device + SetCoarseGrain = 100, ///< Change cache policy to improve performance (disables coherency) + UnsetCoarseGrain = 101 ///< Restore coherent cache policy at the cost of some performance }; enum MemRangeAttribute : uint32_t { diff --git a/device/rocm/rocdevice.cpp b/device/rocm/rocdevice.cpp index 6fe6dd203..9765687be 100644 --- a/device/rocm/rocdevice.cpp +++ b/device/rocm/rocdevice.cpp @@ -2270,6 +2270,12 @@ bool Device::SetSvmAttributesInt(const void* dev_ptr, size_t count, // @note: 0 may cause a failure on old runtimes attr.push_back({HSA_AMD_SVM_ATTRIB_AGENT_ACCESSIBLE_IN_PLACE, 0}); break; + case amd::MemoryAdvice::SetCoarseGrain: + attr.push_back({HSA_AMD_SVM_ATTRIB_GLOBAL_FLAG, HSA_AMD_SVM_GLOBAL_FLAG_COARSE_GRAINED}); + break; + case amd::MemoryAdvice::UnsetCoarseGrain: + attr.push_back({HSA_AMD_SVM_ATTRIB_GLOBAL_FLAG, HSA_AMD_SVM_GLOBAL_FLAG_FINE_GRAINED}); + break; default: return false; break; From a61fe2552c914390e398e49eaf1b5821f8fb5489 Mon Sep 17 00:00:00 2001 From: cjatin Date: Wed, 2 Jun 2021 16:48:41 +0530 Subject: [PATCH 007/102] SWDEV-283267 - Fix a bug where rocclr appends compiler options twice Change-Id: I54ca6e8458cf6414c263df7a8bf61f7ce39a64df --- device/devprogram.cpp | 8 -------- 1 file changed, 8 deletions(-) diff --git a/device/devprogram.cpp b/device/devprogram.cpp index 4ada87fbd..b0e8292a5 100644 --- a/device/devprogram.cpp +++ b/device/devprogram.cpp @@ -713,14 +713,6 @@ bool Program::compileImplLC(const std::string& sourceCode, buildLog_ += "Warning: opening the file to dump the OpenCL source failed.\n"; } } - // Append Options provided by user to driver options - if (isHIP()) { - if (options->origOptionStr.size()) { - std::istringstream userOptions{options->origOptionStr}; - std::copy(std::istream_iterator(userOptions), - std::istream_iterator(), std::back_inserter(driverOptions)); - } - } // Append Options provided by user to driver options if (isHIP()) { From a1de4a0dab03056a66fa5f623837b74d638176c1 Mon Sep 17 00:00:00 2001 From: Chauncey Hui Date: Thu, 3 Jun 2021 03:00:05 -0400 Subject: [PATCH 008/102] SWDEV-2 - Change OpenCL version number from 3317 to 3318 --- utils/versions.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/utils/versions.hpp b/utils/versions.hpp index d60b27dfb..7663d9cc6 100644 --- a/utils/versions.hpp +++ b/utils/versions.hpp @@ -28,7 +28,7 @@ #endif // AMD_PLATFORM_NAME #ifndef AMD_PLATFORM_BUILD_NUMBER -#define AMD_PLATFORM_BUILD_NUMBER 3317 +#define AMD_PLATFORM_BUILD_NUMBER 3318 #endif // AMD_PLATFORM_BUILD_NUMBER #ifndef AMD_PLATFORM_REVISION_NUMBER From ab6de447bdae8576e3457a91750b155c053af24f Mon Sep 17 00:00:00 2001 From: Vladislav Sytchenko Date: Wed, 2 Jun 2021 17:11:16 -0400 Subject: [PATCH 009/102] SWDEV-273235 - Find UGL headers based on AMD_DRIVERS_PATH All KMD/asic_reg/UGL headers are located under the drivers folder. No need for the AMD_UGL_PATH variable as it essentially is ${AMD_DRIVERS_PATH}/ugl. Change-Id: I070d737d50f2096493b3e75ef9b9e824cb19d048 --- cmake/FindAMD_UGL.cmake | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/cmake/FindAMD_UGL.cmake b/cmake/FindAMD_UGL.cmake index b730de6f6..d30520434 100644 --- a/cmake/FindAMD_UGL.cmake +++ b/cmake/FindAMD_UGL.cmake @@ -24,17 +24,18 @@ endif() find_path(AMD_UGL_INCLUDE_DIR GL/glx.h HINTS - ${AMD_UGL_PATH} + ${AMD_DRIVERS_PATH} PATHS # p4 repo layout - ${CMAKE_SOURCE_DIR}/drivers/ugl/inc - ${CMAKE_SOURCE_DIR}/../drivers/ugl/inc - ${CMAKE_SOURCE_DIR}/../../drivers/ugl/inc + ${CMAKE_SOURCE_DIR}/drivers + ${CMAKE_SOURCE_DIR}/../drivers + ${CMAKE_SOURCE_DIR}/../../drivers # github ent repo layout - ${CMAKE_SOURCE_DIR}/drivers/drivers/ugl/inc - ${CMAKE_SOURCE_DIR}/../drivers/drivers/ugl/inc - ${CMAKE_SOURCE_DIR}/../../drivers/drivers/ugl/inc - NO_DEFAULT_PATH) + ${CMAKE_SOURCE_DIR}/drivers/drivers + ${CMAKE_SOURCE_DIR}/../drivers/drivers + ${CMAKE_SOURCE_DIR}/../../drivers/drivers + PATH_SUFFIXES + ugl/inc) include(FindPackageHandleStandardArgs) find_package_handle_standard_args(AMD_UGL From 75ab9f7723c233b161a8651aaf19ff90a3b90b7f Mon Sep 17 00:00:00 2001 From: German Andryeyev Date: Thu, 3 Jun 2021 10:21:39 -0400 Subject: [PATCH 010/102] SWDEV-240804 - Update ReadMostly attribute Switch HSA_AMD_SVM_ATTRIB_READ_ONLY to HSA_AMD_SVM_ATTRIB_READ_MOSTLY to match Cuda. The new attribute was just exposed in ROCr/KFD. Change-Id: I2ee522d33c347ba52a4e272d2cd7f67960490cf7 --- device/rocm/rocdevice.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/device/rocm/rocdevice.cpp b/device/rocm/rocdevice.cpp index 9765687be..512f8e74f 100644 --- a/device/rocm/rocdevice.cpp +++ b/device/rocm/rocdevice.cpp @@ -2231,10 +2231,10 @@ bool Device::SetSvmAttributesInt(const void* dev_ptr, size_t count, switch (advice) { case amd::MemoryAdvice::SetReadMostly: - attr.push_back({HSA_AMD_SVM_ATTRIB_READ_ONLY, true}); + attr.push_back({HSA_AMD_SVM_ATTRIB_READ_MOSTLY, true}); break; case amd::MemoryAdvice::UnsetReadMostly: - attr.push_back({HSA_AMD_SVM_ATTRIB_READ_ONLY, false}); + attr.push_back({HSA_AMD_SVM_ATTRIB_READ_MOSTLY, false}); break; case amd::MemoryAdvice::SetPreferredLocation: if (use_cpu) { From 2876b4b5d3e326123b41028bc20d63679bbee8cb Mon Sep 17 00:00:00 2001 From: kjayapra-amd Date: Fri, 28 May 2021 15:07:34 -0400 Subject: [PATCH 011/102] SWDEV-288690 - Updating the return value with roc::Device::init Change-Id: I132fa424cf9bec608e5c8429e93d20e78b76c6f0 --- device/device.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/device/device.cpp b/device/device.cpp index 82d55b44e..9e14d4969 100644 --- a/device/device.cpp +++ b/device/device.cpp @@ -390,7 +390,8 @@ bool Device::init() { // If returned false, error initializing HSA stack. // If returned true, either HSA not installed or HSA stack // successfully initialized. - if (!roc::Device::init()) { + ret = roc::Device::init(); + if (!ret) { // abort() commentted because this is the only indication // that KFD is not installed. // Ignore the failure and assume KFD is not installed. From 5afdc362a1fc23e1df4935c878b1274d2391a788 Mon Sep 17 00:00:00 2001 From: Chauncey Hui Date: Fri, 4 Jun 2021 03:00:06 -0400 Subject: [PATCH 012/102] SWDEV-2 - Change OpenCL version number from 3318 to 3319 --- utils/versions.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/utils/versions.hpp b/utils/versions.hpp index 7663d9cc6..66da8d420 100644 --- a/utils/versions.hpp +++ b/utils/versions.hpp @@ -28,7 +28,7 @@ #endif // AMD_PLATFORM_NAME #ifndef AMD_PLATFORM_BUILD_NUMBER -#define AMD_PLATFORM_BUILD_NUMBER 3318 +#define AMD_PLATFORM_BUILD_NUMBER 3319 #endif // AMD_PLATFORM_BUILD_NUMBER #ifndef AMD_PLATFORM_REVISION_NUMBER From ba99d85bfe3774547030b70bd45b79c209c9ee29 Mon Sep 17 00:00:00 2001 From: Saleel Kudchadker Date: Wed, 2 Jun 2021 09:03:17 -0700 Subject: [PATCH 013/102] SWDEV-286092 - Enable handler for marker always For DD, send a NOP packet so that we leverage the handler to indicate completion. Change-Id: Ie57ea0124a8497d39cc49da1c4575c2cd86b9319 --- platform/command.cpp | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/platform/command.cpp b/platform/command.cpp index b8dfb244a..7fccab9e6 100644 --- a/platform/command.cpp +++ b/platform/command.cpp @@ -328,14 +328,19 @@ void Command::enqueue() { // when multiple threads submit/flush/update the batch at the same time ScopedLock sl(queue_->vdev()->execution()); queue_->FormSubmissionBatch(this); - if ((type() == CL_COMMAND_MARKER || type() == 0) && !profilingInfo().marker_ts_) { + if ((type() == CL_COMMAND_MARKER || type() == 0)) { // The current HSA signal tracking logic requires profiling enabled for the markers EnableProfiling(); // Update batch head for the current marker. Hence the status of all commands can be // updated upon the marker completion SetBatchHead(queue_->GetSubmittionBatch()); - // Flush the current batch, but skip the wait on CPU if possible to avoid a stall - queue_->vdev()->flush(queue_->GetSubmittionBatch()); + if (profilingInfo().marker_ts_) { + setStatus(CL_SUBMITTED); + submit(*queue_->vdev()); + } else { + // Flush the current batch, but skip the wait on CPU if possible to avoid a stall + queue_->vdev()->flush(queue_->GetSubmittionBatch()); + } // The batch will be tracked with the marker now queue_->ResetSubmissionBatch(); } else { From a6e6b2a4a6c8214e50459ad3680b70acb1729f05 Mon Sep 17 00:00:00 2001 From: Chauncey Hui Date: Sat, 5 Jun 2021 03:00:06 -0400 Subject: [PATCH 014/102] SWDEV-2 - Change OpenCL version number from 3319 to 3320 --- utils/versions.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/utils/versions.hpp b/utils/versions.hpp index 66da8d420..7e023750a 100644 --- a/utils/versions.hpp +++ b/utils/versions.hpp @@ -28,7 +28,7 @@ #endif // AMD_PLATFORM_NAME #ifndef AMD_PLATFORM_BUILD_NUMBER -#define AMD_PLATFORM_BUILD_NUMBER 3319 +#define AMD_PLATFORM_BUILD_NUMBER 3320 #endif // AMD_PLATFORM_BUILD_NUMBER #ifndef AMD_PLATFORM_REVISION_NUMBER From 0b8c154d6ebff6fbc8cabbb8c55628395d4bac19 Mon Sep 17 00:00:00 2001 From: Vladislav Sytchenko Date: Thu, 3 Jun 2021 16:59:37 -0400 Subject: [PATCH 015/102] SWDEV-289548 - [PAL] Revive Raven 2 support Revert back to using the Raven (gfx902) target ID for Raven 2 (gfx909). This is due to the HSAIL compiler not supporting gfx909. In theory there should be no issue with running Raven isa on Raven 2. Change-Id: I425edebc99075799eda5522fad231b8fb3184873 --- device/device.cpp | 6 +++--- device/pal/paldevice.cpp | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/device/device.cpp b/device/device.cpp index 9e14d4969..bc951d9d6 100644 --- a/device/device.cpp +++ b/device/device.cpp @@ -158,9 +158,9 @@ std::pair Isa::supportedIsas() { {"gfx908:sramecc-:xnack+", nullptr, true, false, false, 9, 0, 8, OFF, ON, 4, 16, 1, 256, 64 * Ki, 32}, {"gfx908:sramecc+:xnack-", nullptr, true, false, false, 9, 0, 8, ON, OFF, 4, 16, 1, 256, 64 * Ki, 32}, {"gfx908:sramecc+:xnack+", nullptr, true, false, false, 9, 0, 8, ON, ON, 4, 16, 1, 256, 64 * Ki, 32}, - {"gfx909", nullptr, false, false, false, 9, 0, 9, NONE, ANY, 4, 16, 1, 256, 64 * Ki, 32}, // Also Raven2 (can execute Raven code) - {"gfx909:xnack-", nullptr, false, false, false, 9, 0, 9, NONE, OFF, 4, 16, 1, 256, 64 * Ki, 32}, - {"gfx909:xnack+", nullptr, false, false, false, 9, 0, 9, NONE, ON, 4, 16, 1, 256, 64 * Ki, 32}, + {"gfx902", "gfx903", false, true, false, 9, 0, 2, NONE, ANY, 4, 16, 1, 256, 64 * Ki, 32}, // Also Raven2 (can execute Raven code) + {"gfx902:xnack-", "gfx902", false, true, false, 9, 0, 2, NONE, OFF, 4, 16, 1, 256, 64 * Ki, 32}, + {"gfx902:xnack+", "gfx902", false, true, false, 9, 0, 2, NONE, ON, 4, 16, 1, 256, 64 * Ki, 32}, {"gfx90a", nullptr, true, false, false, 9, 0, 10, ANY, ANY, 4, 16, 1, 256, 64 * Ki, 32}, {"gfx90a:sramecc-", nullptr, true, false, false, 9, 0, 10, OFF, ANY, 4, 16, 1, 256, 64 * Ki, 32}, {"gfx90a:sramecc+", nullptr, true, false, false, 9, 0, 10, ON, ANY, 4, 16, 1, 256, 64 * Ki, 32}, diff --git a/device/pal/paldevice.cpp b/device/pal/paldevice.cpp index 2c2b3964f..dbb73c0fb 100644 --- a/device/pal/paldevice.cpp +++ b/device/pal/paldevice.cpp @@ -93,7 +93,7 @@ static constexpr PalDevice supportedPalDevices[] = { {9, 0, 2, Pal::GfxIpLevel::GfxIp9, "gfx902", Pal::AsicRevision::Raven}, {9, 0, 4, Pal::GfxIpLevel::GfxIp9, "gfx904", Pal::AsicRevision::Vega12}, {9, 0, 6, Pal::GfxIpLevel::GfxIp9, "gfx906", Pal::AsicRevision::Vega20}, - {9, 0, 9, Pal::GfxIpLevel::GfxIp9, "gfx909", Pal::AsicRevision::Raven2}, + {9, 0, 2, Pal::GfxIpLevel::GfxIp9, "gfx902", Pal::AsicRevision::Raven2}, {9, 0, 12, Pal::GfxIpLevel::GfxIp9, "gfx90c", Pal::AsicRevision::Renoir}, {10, 1, 0, Pal::GfxIpLevel::GfxIp10_1, "gfx1010", Pal::AsicRevision::Navi10}, {10, 1, 1, Pal::GfxIpLevel::GfxIp10_1, "gfx1011", Pal::AsicRevision::Navi12}, From 322c00999919d303b44fedca75062e8ce1f701d6 Mon Sep 17 00:00:00 2001 From: Chauncey Hui Date: Sun, 6 Jun 2021 03:00:03 -0400 Subject: [PATCH 016/102] SWDEV-2 - Change OpenCL version number from 3320 to 3321 --- utils/versions.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/utils/versions.hpp b/utils/versions.hpp index 7e023750a..9f5f4749e 100644 --- a/utils/versions.hpp +++ b/utils/versions.hpp @@ -28,7 +28,7 @@ #endif // AMD_PLATFORM_NAME #ifndef AMD_PLATFORM_BUILD_NUMBER -#define AMD_PLATFORM_BUILD_NUMBER 3320 +#define AMD_PLATFORM_BUILD_NUMBER 3321 #endif // AMD_PLATFORM_BUILD_NUMBER #ifndef AMD_PLATFORM_REVISION_NUMBER From 7c0b9562dc51f4b9a32fe562fd6becffd932c069 Mon Sep 17 00:00:00 2001 From: German Andryeyev Date: Mon, 7 Jun 2021 15:56:47 -0400 Subject: [PATCH 017/102] SWDEV-284671 - Add HW event wait to improve hipDeviceSynchronize If AMD event contains a reference to a HW event, then runtime could check/wait for HW event. CPU status update will occur later after HSA signal callback, but it's not important for the result. Change-Id: I591391a953bbdba6a25ac07e2cd98aeb17cd4596 --- device/device.hpp | 9 +++++++++ device/rocm/rocdevice.cpp | 15 +++++++++++++++ device/rocm/rocdevice.hpp | 8 +++++++- platform/command.cpp | 5 ++++- platform/commandqueue.cpp | 10 +++++----- 5 files changed, 40 insertions(+), 7 deletions(-) diff --git a/device/device.hpp b/device/device.hpp index 991cfc4bc..53c2e39fc 100644 --- a/device/device.hpp +++ b/device/device.hpp @@ -1696,6 +1696,15 @@ class Device : public RuntimeObject { cl_set_device_clock_mode_output_amd* pSetClockModeOutput) { return true; }; + + // Returns the status of HW event, associated with amd::Event + virtual bool IsHwEventReady( + const amd::Event& event, //!< AMD event for HW status validation + bool wait = false //!< If true then forces the event completion + ) const { + return false; + }; + //! Returns TRUE if the device is available for computations bool isOnline() const { return online_; } diff --git a/device/rocm/rocdevice.cpp b/device/rocm/rocdevice.cpp index 512f8e74f..3ece9bfcf 100644 --- a/device/rocm/rocdevice.cpp +++ b/device/rocm/rocdevice.cpp @@ -2517,6 +2517,20 @@ bool Device::SetClockMode(const cl_set_device_clock_mode_input_amd setClockModeI return result; } +// ================================================================================================ +bool Device::IsHwEventReady(const amd::Event& event, bool wait) const { + void* hw_event = (event.NotifyEvent() != nullptr) ? + event.NotifyEvent()->HwEvent() : event.HwEvent(); + if (hw_event == nullptr) { + return false; + } else if (wait) { + WaitForSignal(reinterpret_cast(hw_event)->signal_); + return true; + } + return (hsa_signal_load_relaxed(reinterpret_cast(hw_event)->signal_) <= 0); +} + +// ================================================================================================ static void callbackQueue(hsa_status_t status, hsa_queue_t* queue, void* data) { if (status != HSA_STATUS_SUCCESS && status != HSA_STATUS_INFO_BREAK) { // Abort on device exceptions. @@ -2528,6 +2542,7 @@ static void callbackQueue(hsa_status_t status, hsa_queue_t* queue, void* data) { } } +// ================================================================================================ hsa_queue_t* Device::getQueueFromPool(const uint qIndex) { if (qIndex < QueuePriority::Total && queuePool_[qIndex].size() > 0) { typedef decltype(queuePool_)::value_type::const_reference PoolRef; diff --git a/device/rocm/rocdevice.hpp b/device/rocm/rocdevice.hpp index b3d009c52..78cfbb624 100644 --- a/device/rocm/rocdevice.hpp +++ b/device/rocm/rocdevice.hpp @@ -232,7 +232,11 @@ class NullDevice : public amd::Device { return true; } - virtual bool SetClockMode(const cl_set_device_clock_mode_input_amd setClockModeInput, cl_set_device_clock_mode_output_amd* pSetClockModeOutput) { return true; } + virtual bool SetClockMode( + const cl_set_device_clock_mode_input_amd setClockModeInput, + cl_set_device_clock_mode_output_amd* pSetClockModeOutput) { return true; } + + virtual bool IsHwEventReady(const amd::Event& event, bool wait = false) const { return false; } protected: //! Initialize compiler instance and handle @@ -400,6 +404,8 @@ class Device : public NullDevice { virtual bool SetClockMode(const cl_set_device_clock_mode_input_amd setClockModeInput, cl_set_device_clock_mode_output_amd* pSetClockModeOutput); + virtual bool IsHwEventReady(const amd::Event& event, bool wait = false) const; + //! Allocate host memory in terms of numa policy set by user void* hostNumaAlloc(size_t size, size_t alignment, bool atomics = false) const; diff --git a/platform/command.cpp b/platform/command.cpp index 7fccab9e6..389a73ad0 100644 --- a/platform/command.cpp +++ b/platform/command.cpp @@ -65,6 +65,10 @@ Event::~Event() { delete callback; callback = next; } + // Release the notify event + if (notify_event_ != nullptr) { + notify_event_->release(); + } } // ================================================================================================ @@ -272,7 +276,6 @@ bool Event::notifyCmdQueue() { } ClPrint(LOG_DEBUG, LOG_CMD, "queue marker to command queue: %p", queue); command->enqueue(); - command->release(); // Save notification, associated with the current event notify_event_ = command; } diff --git a/platform/commandqueue.cpp b/platform/commandqueue.cpp index 358e79df7..85bb08979 100644 --- a/platform/commandqueue.cpp +++ b/platform/commandqueue.cpp @@ -107,10 +107,6 @@ void HostQueue::finish() { Command* command = nullptr; if (IS_HIP) { command = getLastQueuedCommand(true); - if (nullptr != command) { - command->awaitCompletion(); - command->release(); - } } if (nullptr == command) { // Send a finish to make sure we finished all commands @@ -120,9 +116,13 @@ void HostQueue::finish() { } ClPrint(LOG_DEBUG, LOG_CMD, "marker is queued"); command->enqueue(); + } + // Check HW status of the ROCcrl event. Note: not all ROCclr modes support HW status + static constexpr bool kWaitCompletion = true; + if (!device().IsHwEventReady(command->event(), kWaitCompletion)) { command->awaitCompletion(); - command->release(); } + command->release(); ClPrint(LOG_DEBUG, LOG_CMD, "All commands finished"); } From 512ff9e7cef2d9af1be1d5513e23a4a984d6056e Mon Sep 17 00:00:00 2001 From: Christophe Paquot Date: Mon, 7 Jun 2021 14:14:08 -0700 Subject: [PATCH 018/102] SWDEV-276396 - Implement hipDeviceReset Add a Purge function to MemObjMap Change-Id: Iac51dfda9a7b7c45f2f4a0dc35f7a623121aba1a --- device/device.cpp | 14 ++++++++++++++ device/device.hpp | 1 + 2 files changed, 15 insertions(+) diff --git a/device/device.cpp b/device/device.cpp index bc951d9d6..0f163a267 100644 --- a/device/device.cpp +++ b/device/device.cpp @@ -327,6 +327,20 @@ void MemObjMap::UpdateAccess(amd::Device *peerDev) { } } +void MemObjMap::Purge(amd::Device* dev) { + assert(dev != nullptr); + + amd::ScopedLock lock(AllocatedLock_); + for (auto it = MemObjMap_.cbegin() ; it != MemObjMap_.cend() ;) { + const std::vector& devices = it->second->getContext().devices(); + if (devices.size() == 1 && devices[0] == dev) { + it = MemObjMap_.erase(it); + } else { + ++it; + } + } +} + Device::BlitProgram::~BlitProgram() { if (program_ != nullptr) { program_->release(); diff --git a/device/device.hpp b/device/device.hpp index 53c2e39fc..71ebf5ea4 100644 --- a/device/device.hpp +++ b/device/device.hpp @@ -1238,6 +1238,7 @@ class MemObjMap : public AllStatic { static amd::Memory* FindMemObj( const void* k); //!< find the mem object based on the input pointer static void UpdateAccess(amd::Device *peerDev); + static void Purge(amd::Device*dev); //!< Purge all the memories on the given device private: static std::map MemObjMap_; //!< the mem object<->hostptr information container From ee3eaeb71d7932644c208b88a8d299185fdac055 Mon Sep 17 00:00:00 2001 From: "Arya.Rafii" Date: Wed, 9 Jun 2021 17:02:32 -0400 Subject: [PATCH 019/102] SWDEV-289250 - Should return a nullptr for PAL so we can assume it's a host ptr Change-Id: I530eb39104bbe727c3e38186f6db4e64285b3fc8 --- device/device.hpp | 1 - 1 file changed, 1 deletion(-) diff --git a/device/device.hpp b/device/device.hpp index 71ebf5ea4..29346d238 100644 --- a/device/device.hpp +++ b/device/device.hpp @@ -1834,7 +1834,6 @@ class Device : public RuntimeObject { void SetActiveWait(bool state) { activeWait_ = state; } virtual amd::Memory* GetArenaMemObj(const void* ptr, size_t& offset) { - ShouldNotReachHere(); return nullptr; } From d1bbfabc8ebb8f1fcf18f4a089de438a9f64b8fa Mon Sep 17 00:00:00 2001 From: Chauncey Hui Date: Thu, 10 Jun 2021 03:00:07 -0400 Subject: [PATCH 020/102] SWDEV-2 - Change OpenCL version number from 3321 to 3322 --- utils/versions.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/utils/versions.hpp b/utils/versions.hpp index 9f5f4749e..63a1169d6 100644 --- a/utils/versions.hpp +++ b/utils/versions.hpp @@ -28,7 +28,7 @@ #endif // AMD_PLATFORM_NAME #ifndef AMD_PLATFORM_BUILD_NUMBER -#define AMD_PLATFORM_BUILD_NUMBER 3321 +#define AMD_PLATFORM_BUILD_NUMBER 3322 #endif // AMD_PLATFORM_BUILD_NUMBER #ifndef AMD_PLATFORM_REVISION_NUMBER From 154fb7949b44f27b4908ec28978e06db2b5e1525 Mon Sep 17 00:00:00 2001 From: Saleel Kudchadker Date: Wed, 9 Jun 2021 00:08:02 -0700 Subject: [PATCH 021/102] SWDEV-247372 - Active wait timeout env var - Create an env var ROC_ACTIVE_WAIT_TIMEOUT to set active wait timeout - Record profiling informaion if marker_ts_ property is valid. Change-Id: If0d8aec8d9b0715027cf0f7c3dc8a4c722a6bae6 --- device/rocm/rocvirtual.cpp | 9 +++++--- device/rocm/rocvirtual.hpp | 42 ++++++++++++++++++++++++-------------- utils/flags.hpp | 4 ++-- 3 files changed, 35 insertions(+), 20 deletions(-) diff --git a/device/rocm/rocvirtual.cpp b/device/rocm/rocvirtual.cpp index e8e21a5c7..e486c43be 100644 --- a/device/rocm/rocvirtual.cpp +++ b/device/rocm/rocvirtual.cpp @@ -122,7 +122,7 @@ void Timestamp::checkGpuTime() { } // Avoid profiling data for the sync barrier, in tiny performance tests the first call // to ROCr is very slow and that also affects the overall performance of the callback thread - if (command().GetBatchHead() == nullptr) { + if (command().GetBatchHead() == nullptr || command().profilingInfo().marker_ts_) { hsa_amd_profiling_dispatch_time_t time = {}; if (it->engine_ == HwQueueEngine::Compute) { hsa_amd_profiling_get_dispatch_time(gpu()->gpu_device(), it->signal_, &time); @@ -132,8 +132,11 @@ void Timestamp::checkGpuTime() { time.start = time_sdma.start; time.end = time_sdma.end; } + start = std::min(time.start, start); end = std::max(time.end, end); + ClPrint(amd::LOG_INFO, amd::LOG_SIG, "Signal = (0x%lx), start = %ld, " + "end = %ld", it->signal_.handle, start, end); } it->ts_ = nullptr; it->done_ = true; @@ -452,8 +455,8 @@ std::vector& VirtualGPU::HwQueueTracker::WaitingSignal(HwQueueEngi // Early signal status check if (hsa_signal_load_relaxed(external_signals_[i]->signal_) > 0) { const Settings& settings = gpu_.dev().settings(); - // Actively wait on CPU for 750 us to avoid extra overheads of signal tracking on GPU - if (!WaitForSignal(external_signals_[i]->signal_)) { + // Actively wait on CPU to avoid extra overheads of signal tracking on GPU + if (!WaitForSignal(external_signals_[i]->signal_)) { if (settings.cpu_wait_for_signal_) { // Wait on CPU for completion if requested CpuWaitForSignal(external_signals_[i]); diff --git a/device/rocm/rocvirtual.hpp b/device/rocm/rocvirtual.hpp index f39d901bf..4d40023d9 100644 --- a/device/rocm/rocvirtual.hpp +++ b/device/rocm/rocvirtual.hpp @@ -52,30 +52,42 @@ struct ProfilingSignal : public amd::HeapObject { constexpr static hsa_signal_value_t kInitSignalValueOne = 1; // Timeouts for HSA signal wait -constexpr static uint64_t kTimeout100us = 100000; -constexpr static uint64_t kTimeout750us = 750000; +constexpr static uint64_t kTimeout100us = 100 * K; constexpr static uint64_t kUnlimitedWait = std::numeric_limits::max(); -template +template inline bool WaitForSignal(hsa_signal_t signal) { - if (wait_time != 0) { - if (hsa_signal_wait_scacquire(signal, HSA_SIGNAL_CONDITION_LT, kInitSignalValueOne, - wait_time, HSA_WAIT_STATE_ACTIVE) != 0) { - return false; - } - } else { - uint64_t timeout = (ROC_ACTIVE_WAIT) ? kUnlimitedWait : kTimeout100us; + if (hsa_signal_load_relaxed(signal) > 0) { + if (active_wait_timeout) { + uint64_t timeout = ROC_ACTIVE_WAIT_TIMEOUT * K; + if (timeout == 0) { + return false; + } + ClPrint(amd::LOG_INFO, amd::LOG_SIG, "Host active wait for Signal = (0x%lx) for %d us", + signal.handle, ROC_ACTIVE_WAIT_TIMEOUT); - // Active wait with a timeout - if (hsa_signal_wait_scacquire(signal, HSA_SIGNAL_CONDITION_LT, kInitSignalValueOne, - timeout, HSA_WAIT_STATE_ACTIVE) != 0) { - // Wait until the completion with CPU suspend if (hsa_signal_wait_scacquire(signal, HSA_SIGNAL_CONDITION_LT, kInitSignalValueOne, - kUnlimitedWait, HSA_WAIT_STATE_BLOCKED) != 0) { + timeout, HSA_WAIT_STATE_ACTIVE) != 0) { return false; } + } else { + + uint64_t timeout = kTimeout100us; + ClPrint(amd::LOG_INFO, amd::LOG_SIG, "Host wait until Signal = (0x%lx) decremented", + signal.handle); + + // Active wait with a timeout + if (hsa_signal_wait_scacquire(signal, HSA_SIGNAL_CONDITION_LT, kInitSignalValueOne, + timeout, HSA_WAIT_STATE_ACTIVE) != 0) { + // Wait until the completion with CPU suspend + if (hsa_signal_wait_scacquire(signal, HSA_SIGNAL_CONDITION_LT, kInitSignalValueOne, + kUnlimitedWait, HSA_WAIT_STATE_BLOCKED) != 0) { + return false; + } + } } } + return true; } diff --git a/utils/flags.hpp b/utils/flags.hpp index 50c0b23f5..a9d7edf00 100644 --- a/utils/flags.hpp +++ b/utils/flags.hpp @@ -233,8 +233,8 @@ release(uint, HIP_HIDDEN_FREE_MEM, 0, \ "0 = Disable") \ release(size_t, GPU_FORCE_BLIT_COPY_SIZE, 0, \ "Size in KB of the threshold below which to force blit instead for sdma") \ -release(bool, ROC_ACTIVE_WAIT, false, \ - "Forces unconditional active wait for GPU") \ +release(uint, ROC_ACTIVE_WAIT_TIMEOUT, 750, \ + "Forces active wait of GPU interrup for the timeout(us)") \ release(bool, ROC_ENABLE_LARGE_BAR, true, \ "Enable Large Bar if supported by the device") \ release(bool, ROC_CPU_WAIT_FOR_SIGNAL, true, \ From c841045ed4ce22575df1c31aa3ea265a9fec4f95 Mon Sep 17 00:00:00 2001 From: Alex Xie Date: Wed, 5 May 2021 15:23:02 -0400 Subject: [PATCH 022/102] SWDEV-290306 - [LNX][Navi24][mainline]clinfo test failed on Navi24 Add Navi 24 support Change-Id: I7343384cf6fb8c532321e57e202c196ef054f459 --- device/device.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/device/device.cpp b/device/device.cpp index 0f163a267..96101ac03 100644 --- a/device/device.cpp +++ b/device/device.cpp @@ -185,6 +185,7 @@ std::pair Isa::supportedIsas() { {"gfx1030", "gfx1030", true, true, false, 10, 3, 0, NONE, NONE, 2, 32, 1, 256, 64 * Ki, 32}, {"gfx1031", "gfx1031", true, true, false, 10, 3, 1, NONE, NONE, 2, 32, 1, 256, 64 * Ki, 32}, {"gfx1032", "gfx1032", true, true, false, 10, 3, 2, NONE, NONE, 2, 32, 1, 256, 64 * Ki, 32}, + {"gfx1034", "gfx1034", true, false, false, 10, 3, 4, NONE, NONE, 2, 32, 1, 256, 64 * Ki, 32} }; return std::make_pair(std::begin(supportedIsas_), std::end(supportedIsas_)); } From b79ae29737ee82e7d04ae927bec55ad29e286e1a Mon Sep 17 00:00:00 2001 From: Saleel Kudchadker Date: Thu, 10 Jun 2021 10:55:32 -0700 Subject: [PATCH 023/102] SWDEV-286092 - Use Barrier Header for event Change-Id: I9701fbab587e2ea31e58449e8c8b07341a7aa161 --- device/rocm/rocvirtual.cpp | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) diff --git a/device/rocm/rocvirtual.cpp b/device/rocm/rocvirtual.cpp index e486c43be..78ebece0d 100644 --- a/device/rocm/rocvirtual.cpp +++ b/device/rocm/rocvirtual.cpp @@ -2920,14 +2920,8 @@ void VirtualGPU::submitMarker(amd::Marker& vcmd) { if (vcmd.profilingInfo().marker_ts_) { profilingBegin(vcmd); if (timestamp_ != nullptr) { - // If there was a pending dispatch use a Barrier packet - // with cache flushes. This saves on additional barrier - // for cache flushes explicitly and helps wall time - dispatchBarrierPacket(kNopPacketHeader); - // Direct dispatch requires a barrier with callback and hasPendingDispatch_ triggers that - if (AMD_DIRECT_DISPATCH) { - hasPendingDispatch_ = true; - } + // Submit a barrier with a cache flushes. + dispatchBarrierPacket(kBarrierPacketHeader); } profilingEnd(vcmd); } From 282e1918818d8bb346e6d7702b1ddb592b02bbb7 Mon Sep 17 00:00:00 2001 From: German Andryeyev Date: Thu, 10 Jun 2021 14:49:14 -0400 Subject: [PATCH 024/102] SWDEV-290371 - Add lock protection for signal Add lock protection for signal processing If signal is reused, then disable reference to it from HIP Increase the pool signal size to 32 Change-Id: I7d529b35910f83ce577c9eca6d3386759611ccc0 --- device/rocm/rocvirtual.cpp | 14 +++++++++++--- device/rocm/rocvirtual.hpp | 7 +++++-- 2 files changed, 16 insertions(+), 5 deletions(-) diff --git a/device/rocm/rocvirtual.cpp b/device/rocm/rocvirtual.cpp index 78ebece0d..8e5f3e9dc 100644 --- a/device/rocm/rocvirtual.cpp +++ b/device/rocm/rocvirtual.cpp @@ -109,12 +109,14 @@ static unsigned extractAqlBits(unsigned v, unsigned pos, unsigned width) { }; // ================================================================================================ -void Timestamp::checkGpuTime() { +void Timestamp::checkGpuTime(bool event_recycle) { if (HwProfiling()) { uint64_t start = std::numeric_limits::max(); uint64_t end = 0; for (auto it : signals_) { + amd::ScopedLock lock(it->LockSignalOps()); + // Ignore the wait if runtime processes API callback, because the signal value is bigger // than expected and the value reset will occur after API callback is done if (GetCallbackSignal().handle == 0) { @@ -138,6 +140,10 @@ void Timestamp::checkGpuTime() { ClPrint(amd::LOG_INFO, amd::LOG_SIG, "Signal = (0x%lx), start = %ld, " "end = %ld", it->signal_.handle, start, end); } + // The signal is reused and the upper layer can't rely on it. + if (event_recycle) { + const_cast(it->ts_->command_).SetHwEvent(nullptr); + } it->ts_ = nullptr; it->done_ = true; } @@ -325,7 +331,7 @@ VirtualGPU::HwQueueTracker::~HwQueueTracker() { // ================================================================================================ bool VirtualGPU::HwQueueTracker::Create() { - constexpr size_t kSignalListSize = 16; + constexpr size_t kSignalListSize = 32; signal_list_.resize(kSignalListSize); hsa_agent_t agent = gpu_.gpu_device(); @@ -475,11 +481,13 @@ std::vector& VirtualGPU::HwQueueTracker::WaitingSignal(HwQueueEngi // ================================================================================================ bool VirtualGPU::HwQueueTracker::CpuWaitForSignal(ProfilingSignal* signal) { + amd::ScopedLock lock(signal->LockSignalOps()); // Wait for the current signal if (!signal->done_) { // Update timestamp values if requested if (signal->ts_ != nullptr) { - signal->ts_->checkGpuTime(); + static constexpr bool kEventRecycle = true; + signal->ts_->checkGpuTime(kEventRecycle); } else { ClPrint(amd::LOG_DEBUG, amd::LOG_COPY, "[%zx]!\t Host wait on completion_signal=0x%zx", std::this_thread::get_id(), signal->signal_.handle); diff --git a/device/rocm/rocvirtual.hpp b/device/rocm/rocvirtual.hpp index 4d40023d9..aaa05acf6 100644 --- a/device/rocm/rocvirtual.hpp +++ b/device/rocm/rocvirtual.hpp @@ -37,15 +37,18 @@ class Memory; class Timestamp; struct ProfilingSignal : public amd::HeapObject { + amd::Monitor lock_; //!< Signal lock for update hsa_signal_t signal_; //!< HSA signal to track profiling information Timestamp* ts_; //!< Timestamp object associated with the signal HwQueueEngine engine_; //!< Engine used with this signal bool done_; //!< True if signal is done ProfilingSignal() - : ts_(nullptr) + : lock_("Signal Ops Lock", true) + , ts_(nullptr) , engine_(HwQueueEngine::Compute) , done_(true) { signal_.handle = 0; } + amd::Monitor& LockSignalOps() { return lock_; } }; // Initial HSA signal value @@ -136,7 +139,7 @@ class Timestamp : public amd::HeapObject { const bool HwProfiling() const { return !signals_.empty(); } //! Finds execution ticks on GPU - void checkGpuTime(); + void checkGpuTime(bool event_recycle = false); // Start a timestamp (get timestamp from OS) void start() { start_ = amd::Os::timeNanos(); } From 4c2e7cace7a7a95fcc80bbc7f144b583cbcfd3e3 Mon Sep 17 00:00:00 2001 From: Aaron Liu Date: Fri, 11 Jun 2021 08:15:06 +0800 Subject: [PATCH 025/102] SWDEV-290474 - [Lnx][VanGogh] Add VanGogh support Signed-off-by: Aaron Liu Reviewed-by: Christophe Paquot Change-Id: Iff0253a181bbfc1984304014a9e3b542b2556635 --- device/device.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/device/device.cpp b/device/device.cpp index 96101ac03..bec70e614 100644 --- a/device/device.cpp +++ b/device/device.cpp @@ -185,6 +185,7 @@ std::pair Isa::supportedIsas() { {"gfx1030", "gfx1030", true, true, false, 10, 3, 0, NONE, NONE, 2, 32, 1, 256, 64 * Ki, 32}, {"gfx1031", "gfx1031", true, true, false, 10, 3, 1, NONE, NONE, 2, 32, 1, 256, 64 * Ki, 32}, {"gfx1032", "gfx1032", true, true, false, 10, 3, 2, NONE, NONE, 2, 32, 1, 256, 64 * Ki, 32}, + {"gfx1033", "gfx1033", true, false, false, 10, 3, 3, NONE, NONE, 2, 32, 1, 256, 64 * Ki, 32}, {"gfx1034", "gfx1034", true, false, false, 10, 3, 4, NONE, NONE, 2, 32, 1, 256, 64 * Ki, 32} }; return std::make_pair(std::begin(supportedIsas_), std::end(supportedIsas_)); From 6c41e69e669771181b068720fd701e810b11838e Mon Sep 17 00:00:00 2001 From: Chauncey Hui Date: Fri, 11 Jun 2021 03:00:09 -0400 Subject: [PATCH 026/102] SWDEV-2 - Change OpenCL version number from 3322 to 3323 --- utils/versions.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/utils/versions.hpp b/utils/versions.hpp index 63a1169d6..f23088db9 100644 --- a/utils/versions.hpp +++ b/utils/versions.hpp @@ -28,7 +28,7 @@ #endif // AMD_PLATFORM_NAME #ifndef AMD_PLATFORM_BUILD_NUMBER -#define AMD_PLATFORM_BUILD_NUMBER 3322 +#define AMD_PLATFORM_BUILD_NUMBER 3323 #endif // AMD_PLATFORM_BUILD_NUMBER #ifndef AMD_PLATFORM_REVISION_NUMBER From 0a9065b0e3767012f37909215bcd4ad58e1d34df Mon Sep 17 00:00:00 2001 From: Saleel Kudchadker Date: Mon, 14 Jun 2021 14:25:30 -0700 Subject: [PATCH 027/102] SWDEV-247372 - Reset hasPendingDispatch Reset hasPendingDispatch_ if we insert barrier for time marker. Change-Id: Id038fd4e1c910c0a657978fee00630e49c372321 --- device/rocm/rocvirtual.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/device/rocm/rocvirtual.cpp b/device/rocm/rocvirtual.cpp index 8e5f3e9dc..2a832d26e 100644 --- a/device/rocm/rocvirtual.cpp +++ b/device/rocm/rocvirtual.cpp @@ -2930,6 +2930,8 @@ void VirtualGPU::submitMarker(amd::Marker& vcmd) { if (timestamp_ != nullptr) { // Submit a barrier with a cache flushes. dispatchBarrierPacket(kBarrierPacketHeader); + // Reset this flag since we already enable system scope for kBarrierPacketHeader + hasPendingDispatch_ = false; } profilingEnd(vcmd); } From 5129eae3a54e50e6c180973600d1c6440cadaad7 Mon Sep 17 00:00:00 2001 From: Chauncey Hui Date: Tue, 15 Jun 2021 03:00:06 -0400 Subject: [PATCH 028/102] SWDEV-2 - Change OpenCL version number from 3323 to 3324 --- utils/versions.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/utils/versions.hpp b/utils/versions.hpp index f23088db9..8c94dabfc 100644 --- a/utils/versions.hpp +++ b/utils/versions.hpp @@ -28,7 +28,7 @@ #endif // AMD_PLATFORM_NAME #ifndef AMD_PLATFORM_BUILD_NUMBER -#define AMD_PLATFORM_BUILD_NUMBER 3323 +#define AMD_PLATFORM_BUILD_NUMBER 3324 #endif // AMD_PLATFORM_BUILD_NUMBER #ifndef AMD_PLATFORM_REVISION_NUMBER From 995513a1e8b2f2901a750faba8fa9d1db881e3f1 Mon Sep 17 00:00:00 2001 From: Sourabh Betigeri Date: Fri, 4 Jun 2021 15:51:48 -0700 Subject: [PATCH 029/102] SWDEV-286446 - This patch enables stream operations on vega10, vega20, MI100 and MI200 Change-Id: I6f07036d8ee6e4c6b55196a13288f8107488d824 --- device/rocm/rocdevice.cpp | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/device/rocm/rocdevice.cpp b/device/rocm/rocdevice.cpp index 3ece9bfcf..711d782ea 100644 --- a/device/rocm/rocdevice.cpp +++ b/device/rocm/rocdevice.cpp @@ -1523,7 +1523,9 @@ bool Device::populateOCLDeviceConstants() { info_.cooperativeMultiDeviceGroups_ = settings().enableCoopMultiDeviceGroups_; // TODO: Update this to use HSA API when it is ready. For now limiting this to gfx9 - info_.aqlBarrierValue_ = (isa().versionMajor() == 9 && isa().versionMinor() == 0); + info_.aqlBarrierValue_ = (isa().versionMajor() == 9 && isa().versionMinor() == 0 && + (isa().versionStepping() == 0 || isa().versionStepping() == 4 || + isa().versionStepping() == 8 || isa().versionStepping() == 10)); } info_.maxPipePacketSize_ = info_.maxMemAllocSize_; From be3ec5cad71f647dbe91c27a8f6c72226edd417e Mon Sep 17 00:00:00 2001 From: Chauncey Hui Date: Wed, 16 Jun 2021 03:00:08 -0400 Subject: [PATCH 030/102] SWDEV-2 - Change OpenCL version number from 3324 to 3325 --- utils/versions.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/utils/versions.hpp b/utils/versions.hpp index 8c94dabfc..0e8ad78f6 100644 --- a/utils/versions.hpp +++ b/utils/versions.hpp @@ -28,7 +28,7 @@ #endif // AMD_PLATFORM_NAME #ifndef AMD_PLATFORM_BUILD_NUMBER -#define AMD_PLATFORM_BUILD_NUMBER 3324 +#define AMD_PLATFORM_BUILD_NUMBER 3325 #endif // AMD_PLATFORM_BUILD_NUMBER #ifndef AMD_PLATFORM_REVISION_NUMBER From b15799caa511e8e4fdd853410f1c0555c38aace6 Mon Sep 17 00:00:00 2001 From: German Andryeyev Date: Tue, 15 Jun 2021 23:13:27 -0400 Subject: [PATCH 031/102] SWDEV-290160 - Don't send notification for batch markers Batch marker already has a barrier with HSA signal callback Change-Id: I69fc63d72320c2e9cc2d2e59ebd3f07c0bd0e3b5 --- device/rocm/rocvirtual.cpp | 3 ++- platform/command.cpp | 12 +++--------- 2 files changed, 5 insertions(+), 10 deletions(-) diff --git a/device/rocm/rocvirtual.cpp b/device/rocm/rocvirtual.cpp index 2a832d26e..5f2659c76 100644 --- a/device/rocm/rocvirtual.cpp +++ b/device/rocm/rocvirtual.cpp @@ -1239,7 +1239,8 @@ void VirtualGPU::profilingEnd(amd::Command& command) { timestamp_->end(); } command.setData(timestamp_); - if (AMD_DIRECT_DISPATCH) { + // Update HW event only for batches + if ((AMD_DIRECT_DISPATCH) && (command.GetBatchHead() != nullptr)) { command.SetHwEvent(timestamp_->Signals().back()); } timestamp_ = nullptr; diff --git a/platform/command.cpp b/platform/command.cpp index 389a73ad0..ed0c5fa0f 100644 --- a/platform/command.cpp +++ b/platform/command.cpp @@ -259,15 +259,9 @@ bool Event::notifyCmdQueue() { HostQueue* queue = command().queue(); if ((status() > CL_COMPLETE) && (nullptr != queue) && (!AMD_DIRECT_DISPATCH || - // Don't need to notify any marker with direct dispatch, - // because all markers are blocking. - ((command().type() != CL_COMMAND_MARKER) && - (command().type() != 0)) || - // Don't need to notify if the current batch is empty, - // because that means the command was processed and extra notification - // will cause a stall on the host. - (queue->GetSubmittionBatch() != nullptr)) && - !notified_.test_and_set()) { + // If HW event was assigned, then notification can be ignored, since a barrier was issued + (HwEvent() == nullptr)) && + !notified_.test_and_set()) { // Make sure the queue is draining the enqueued commands. amd::Command* command = new amd::Marker(*queue, false, nullWaitList, this); if (command == NULL) { From 115f7a3f1dda66080f9ed10791d024d3892d6c59 Mon Sep 17 00:00:00 2001 From: Christophe Paquot Date: Wed, 16 Jun 2021 09:13:58 -0700 Subject: [PATCH 032/102] SWDEV-240806 - Release resources in Command::terminate for HIP We do not want to release resources during setStatus in HIP because of Graphs Change-Id: Idc7b188ab5f8be6975ea91005dd2bbf177401f8c --- platform/command.cpp | 4 +++- platform/command.hpp | 5 ++++- platform/commandqueue.cpp | 9 ++++++++- 3 files changed, 15 insertions(+), 3 deletions(-) diff --git a/platform/command.cpp b/platform/command.cpp index ed0c5fa0f..082deaacc 100644 --- a/platform/command.cpp +++ b/platform/command.cpp @@ -145,7 +145,9 @@ bool Event::setStatus(int32_t status, uint64_t timeStamp) { if (status <= CL_COMPLETE) { // Before we notify the waiters that this event reached the CL_COMPLETE // status, we release all the resources associated with this instance. - releaseResources(); + if (!IS_HIP) { + releaseResources(); + } activity_.ReportEventTimestamps(command()); // Broadcast all the waiters. diff --git a/platform/command.hpp b/platform/command.hpp index 21bce75a0..ec9843a1b 100644 --- a/platform/command.hpp +++ b/platform/command.hpp @@ -262,7 +262,10 @@ class Command : public Event { eventWaitList_(nullWaitList), commandWaitBits_(0) {} - bool terminate() { + virtual bool terminate() { + if (IS_HIP) { + releaseResources(); + } if (Agent::shouldPostEventEvents() && type() != 0) { Agent::postEventFree(as_cl(static_cast(this))); } diff --git a/platform/commandqueue.cpp b/platform/commandqueue.cpp index 85bb08979..998dcb504 100644 --- a/platform/commandqueue.cpp +++ b/platform/commandqueue.cpp @@ -123,6 +123,14 @@ void HostQueue::finish() { command->awaitCompletion(); } command->release(); + if (IS_HIP) { + ScopedLock sl(vdev()->execution()); + ScopedLock l(lastCmdLock_); + if (lastEnqueueCommand_ != nullptr) { + lastEnqueueCommand_->release(); + lastEnqueueCommand_ = nullptr; + } + } ClPrint(LOG_DEBUG, LOG_CMD, "All commands finished"); } @@ -238,7 +246,6 @@ Command* HostQueue::getLastQueuedCommand(bool retain) { // The batch update must be lock protected to avoid a race condition // when multiple threads submit/flush/update the batch at the same time ScopedLock sl(vdev()->execution()); - // Since the lastCmdLock_ is acquired, it is safe to read and retain the lastEnqueueCommand. // It is guaranteed that the pointer will not change. if (retain && lastEnqueueCommand_ != nullptr) { From 3352fc9e3440aa455f75096cf323a2b57c7e4fba Mon Sep 17 00:00:00 2001 From: Chauncey Hui Date: Thu, 17 Jun 2021 03:00:07 -0400 Subject: [PATCH 033/102] SWDEV-2 - Change OpenCL version number from 3325 to 3326 --- utils/versions.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/utils/versions.hpp b/utils/versions.hpp index 0e8ad78f6..53251ffed 100644 --- a/utils/versions.hpp +++ b/utils/versions.hpp @@ -28,7 +28,7 @@ #endif // AMD_PLATFORM_NAME #ifndef AMD_PLATFORM_BUILD_NUMBER -#define AMD_PLATFORM_BUILD_NUMBER 3325 +#define AMD_PLATFORM_BUILD_NUMBER 3326 #endif // AMD_PLATFORM_BUILD_NUMBER #ifndef AMD_PLATFORM_REVISION_NUMBER From 91e48d763883e4691604560ec36bf319a305230c Mon Sep 17 00:00:00 2001 From: Saleel Kudchadker Date: Wed, 16 Jun 2021 22:11:26 -0700 Subject: [PATCH 034/102] SWDEV-247372 - Add logging for debug Change-Id: Id5a27034005a7deba37072d8a4c6f250104a96c8 --- device/rocm/rocdevice.cpp | 1 + platform/commandqueue.cpp | 1 + 2 files changed, 2 insertions(+) diff --git a/device/rocm/rocdevice.cpp b/device/rocm/rocdevice.cpp index 711d782ea..0551e0e98 100644 --- a/device/rocm/rocdevice.cpp +++ b/device/rocm/rocdevice.cpp @@ -2524,6 +2524,7 @@ bool Device::IsHwEventReady(const amd::Event& event, bool wait) const { void* hw_event = (event.NotifyEvent() != nullptr) ? event.NotifyEvent()->HwEvent() : event.HwEvent(); if (hw_event == nullptr) { + ClPrint(amd::LOG_INFO, amd::LOG_SIG, "No HW event"); return false; } else if (wait) { WaitForSignal(reinterpret_cast(hw_event)->signal_); diff --git a/platform/commandqueue.cpp b/platform/commandqueue.cpp index 998dcb504..ea80a82de 100644 --- a/platform/commandqueue.cpp +++ b/platform/commandqueue.cpp @@ -120,6 +120,7 @@ void HostQueue::finish() { // Check HW status of the ROCcrl event. Note: not all ROCclr modes support HW status static constexpr bool kWaitCompletion = true; if (!device().IsHwEventReady(command->event(), kWaitCompletion)) { + ClPrint(LOG_DEBUG, LOG_CMD, "HW Event not ready, awaiting completion instead"); command->awaitCompletion(); } command->release(); From 3e46c182309c53cd86f2eea9d256bb47dca08487 Mon Sep 17 00:00:00 2001 From: Chauncey Hui Date: Fri, 18 Jun 2021 03:00:45 -0400 Subject: [PATCH 035/102] SWDEV-2 - Change OpenCL version number from 3326 to 3327 --- utils/versions.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/utils/versions.hpp b/utils/versions.hpp index 53251ffed..3b6cbf4e2 100644 --- a/utils/versions.hpp +++ b/utils/versions.hpp @@ -28,7 +28,7 @@ #endif // AMD_PLATFORM_NAME #ifndef AMD_PLATFORM_BUILD_NUMBER -#define AMD_PLATFORM_BUILD_NUMBER 3326 +#define AMD_PLATFORM_BUILD_NUMBER 3327 #endif // AMD_PLATFORM_BUILD_NUMBER #ifndef AMD_PLATFORM_REVISION_NUMBER From 990d056c59dcbdace10b8495f3fab24ed94b85ad Mon Sep 17 00:00:00 2001 From: Alex Xie Date: Tue, 15 Jun 2021 15:43:59 -0400 Subject: [PATCH 036/102] SWDEV-290122 - OpenGL tests fails with CL_MEM_ALLOCATION_ERROR Change-Id: I34d7063f05dd46c32cf59a88c455691ba5099679 --- device/pal/palresource.cpp | 3 +++ 1 file changed, 3 insertions(+) diff --git a/device/pal/palresource.cpp b/device/pal/palresource.cpp index 5148d47ec..2950e8a01 100644 --- a/device/pal/palresource.cpp +++ b/device/pal/palresource.cpp @@ -930,6 +930,9 @@ bool Resource::CreateInterop(CreateParams* params) { imgOpenInfo.swizzledFormat.swizzle = channels; imgOpenInfo.usage.shaderRead = true; imgOpenInfo.usage.shaderWrite = true; +#if defined(__unix__) + imgOpenInfo.resourceInfo.handleType = Pal::HandleType::DmaBufFd; +#endif memRef_ = GpuMemoryReference::Create(dev(), imgOpenInfo, &imgCreateInfo, &image_); if (nullptr == memRef_) { return false; From 18ff9bcb1c4b0c03403b467f3cedd50f3f7dd488 Mon Sep 17 00:00:00 2001 From: German Andryeyev Date: Fri, 18 Jun 2021 17:07:40 -0400 Subject: [PATCH 037/102] SWDEV-290160 - Switch to global HSA signals Runtime can't assign internal HSA signals for HIP events, because HIP application can destroy the HIP stream or signal reuse may occur internally. Switch to global HSA signals for HIP events. Change-Id: Ieaea2d6b039e492b2e7c5112782a8f4e601e50a1 --- device/device.hpp | 2 + device/rocm/rocdevice.cpp | 37 ++++++++++ device/rocm/rocdevice.hpp | 19 +++++ device/rocm/rocvirtual.cpp | 137 ++++++++++++++++--------------------- device/rocm/rocvirtual.hpp | 25 ++----- platform/command.cpp | 61 +++++++++++------ platform/command.hpp | 1 + 7 files changed, 162 insertions(+), 120 deletions(-) diff --git a/device/device.hpp b/device/device.hpp index 29346d238..7c0e13db1 100644 --- a/device/device.hpp +++ b/device/device.hpp @@ -49,6 +49,7 @@ #include #include #include +#include #include namespace amd { @@ -1705,6 +1706,7 @@ class Device : public RuntimeObject { ) const { return false; }; + virtual void ReleaseGlobalSignal(void* signal) const {} //! Returns TRUE if the device is available for computations bool isOnline() const { return online_; } diff --git a/device/rocm/rocdevice.cpp b/device/rocm/rocdevice.cpp index 0551e0e98..c6e94732a 100644 --- a/device/rocm/rocdevice.cpp +++ b/device/rocm/rocdevice.cpp @@ -2919,6 +2919,7 @@ bool Device::findLinkInfo(const hsa_amd_memory_pool_t& pool, return true; } +// ================================================================================================ void Device::getGlobalCUMask(std::string cuMaskStr) { if (cuMaskStr.length() != 0) { std::string pre = cuMaskStr.substr(0, 2); @@ -2971,10 +2972,12 @@ void Device::getGlobalCUMask(std::string cuMaskStr) { } } +// ================================================================================================ device::Signal* Device::createSignal() const { return new roc::Signal(); } +// ================================================================================================ amd::Memory* Device::GetArenaMemObj(const void* ptr, size_t& offset) { // If arena_mem_obj_ is null, then HMM and Xnack is disabled. Return nullptr. if (arena_mem_obj_ == nullptr) { @@ -2989,5 +2992,39 @@ amd::Memory* Device::GetArenaMemObj(const void* ptr, size_t& offset) { return arena_mem_obj_; } +// ================================================================================================ +ProfilingSignal* Device::GetGlobalSignal(Timestamp* ts) const { + std::unique_ptr prof_signal(new ProfilingSignal()); + if (prof_signal != nullptr) { + hsa_agent_t agent = getBackendDevice(); + hsa_agent_t* agents = (settings().system_scope_signal_) ? nullptr : &agent; + uint32_t num_agents = (settings().system_scope_signal_) ? 0 : 1; + + if (ts != 0) { + // Save HSA signal earlier to make sure the possible callback will have a valid + // value for processing + prof_signal->ts_ = ts; + ts->AddProfilingSignal(prof_signal.get()); + } + + if (HSA_STATUS_SUCCESS == hsa_signal_create(kInitSignalValueOne, + num_agents, agents, &prof_signal->signal_)) { + return prof_signal.release(); + } + } + return nullptr; +} + +// ================================================================================================ +void Device::ReleaseGlobalSignal(void* signal) const { + if (signal != nullptr) { + ProfilingSignal* prof_signal = reinterpret_cast(signal); + if (prof_signal->signal_.handle != 0) { + hsa_signal_destroy(prof_signal->signal_); + } + delete prof_signal; + } +} + } // namespace roc #endif // WITHOUT_HSA_BACKEND diff --git a/device/rocm/rocdevice.hpp b/device/rocm/rocdevice.hpp index 78cfbb624..0fb6f0b8e 100644 --- a/device/rocm/rocdevice.hpp +++ b/device/rocm/rocdevice.hpp @@ -77,6 +77,21 @@ class VirtualDevice; class PrintfDbg; class IProDevice; +struct ProfilingSignal : public amd::HeapObject { + hsa_signal_t signal_; //!< HSA signal to track profiling information + Timestamp* ts_; //!< Timestamp object associated with the signal + HwQueueEngine engine_; //!< Engine used with this signal + bool done_; //!< True if signal is done + amd::Monitor lock_; //!< Signal lock for update + ProfilingSignal() + : ts_(nullptr) + , engine_(HwQueueEngine::Compute) + , done_(true) + , lock_("Signal Ops Lock", true) + { signal_.handle = 0; } + amd::Monitor& LockSignalOps() { return lock_; } +}; + class Sampler : public device::Sampler { public: //! Constructor @@ -237,6 +252,7 @@ class NullDevice : public amd::Device { cl_set_device_clock_mode_output_amd* pSetClockModeOutput) { return true; } virtual bool IsHwEventReady(const amd::Event& event, bool wait = false) const { return false; } + virtual void ReleaseGlobalSignal(void* signal) const {} protected: //! Initialize compiler instance and handle @@ -405,6 +421,7 @@ class Device : public NullDevice { cl_set_device_clock_mode_output_amd* pSetClockModeOutput); virtual bool IsHwEventReady(const amd::Event& event, bool wait = false) const; + virtual void ReleaseGlobalSignal(void* signal) const; //! Allocate host memory in terms of numa policy set by user void* hostNumaAlloc(size_t size, size_t alignment, bool atomics = false) const; @@ -505,6 +522,8 @@ class Device : public NullDevice { virtual amd::Memory* GetArenaMemObj(const void* ptr, size_t& offset); + ProfilingSignal* GetGlobalSignal(Timestamp* ts) const; + private: bool create(); diff --git a/device/rocm/rocvirtual.cpp b/device/rocm/rocvirtual.cpp index 5f2659c76..d31f01982 100644 --- a/device/rocm/rocvirtual.cpp +++ b/device/rocm/rocvirtual.cpp @@ -109,7 +109,7 @@ static unsigned extractAqlBits(unsigned v, unsigned pos, unsigned width) { }; // ================================================================================================ -void Timestamp::checkGpuTime(bool event_recycle) { +void Timestamp::checkGpuTime() { if (HwProfiling()) { uint64_t start = std::numeric_limits::max(); uint64_t end = 0; @@ -140,10 +140,6 @@ void Timestamp::checkGpuTime(bool event_recycle) { ClPrint(amd::LOG_INFO, amd::LOG_SIG, "Signal = (0x%lx), start = %ld, " "end = %ld", it->signal_.handle, start, end); } - // The signal is reused and the upper layer can't rely on it. - if (event_recycle) { - const_cast(it->ts_->command_).SetHwEvent(nullptr); - } it->ts_ = nullptr; it->done_ = true; } @@ -390,23 +386,7 @@ hsa_signal_t VirtualGPU::HwQueueTracker::ActiveSignal( // If direct dispatch is enabled and the batch head isn't null, then it's a marker and // requires the batch update upon HSA signal completion if (AMD_DIRECT_DISPATCH && (ts->command().GetBatchHead() != nullptr)) { - uint32_t init_value = kInitSignalValueOne; - // If API callback is enabled, then use a blocking signal for AQL queue. - // HSA signal will be acquired in SW and released after HSA signal callback - if (ts->command().Callback() != nullptr) { - ts->SetCallbackSignal(prof_signal->signal_); - // Blocks AQL queue from further processing - hsa_signal_add_relaxed(prof_signal->signal_, 1); - init_value += 1; - } - hsa_status_t result = hsa_amd_signal_async_handler(prof_signal->signal_, - HSA_SIGNAL_CONDITION_LT, init_value, &HsaAmdSignalHandler, ts); - if (HSA_STATUS_SUCCESS != result) { - LogError("hsa_amd_signal_async_handler() failed to set the handler!"); - } else { - ClPrint(amd::LOG_INFO, amd::LOG_SIG, "Set Handler: handle(0x%lx), timestamp(%p)", - prof_signal->signal_.handle, prof_signal); - } + assert(false && "Runtime should not have batch command in ActiveSignal!"); } if (!sdma_profiling_) { hsa_amd_profiling_async_copy_enable(true); @@ -486,8 +466,7 @@ bool VirtualGPU::HwQueueTracker::CpuWaitForSignal(ProfilingSignal* signal) { if (!signal->done_) { // Update timestamp values if requested if (signal->ts_ != nullptr) { - static constexpr bool kEventRecycle = true; - signal->ts_->checkGpuTime(kEventRecycle); + signal->ts_->checkGpuTime(); } else { ClPrint(amd::LOG_DEBUG, amd::LOG_COPY, "[%zx]!\t Host wait on completion_signal=0x%zx", std::this_thread::get_id(), signal->signal_.handle); @@ -892,7 +871,8 @@ bool VirtualGPU::dispatchCounterAqlPacket(hsa_ext_amd_aql_pm4_packet_t* packet, } // ================================================================================================ -void VirtualGPU::dispatchBarrierPacket(uint16_t packetHeader, bool skipSignal) { +void VirtualGPU::dispatchBarrierPacket(uint16_t packetHeader, + bool skipSignal, const ProfilingSignal* global_signal) { const uint32_t queueSize = gpu_queue_->size; const uint32_t queueMask = queueSize - 1; @@ -915,12 +895,16 @@ void VirtualGPU::dispatchBarrierPacket(uint16_t packetHeader, bool skipSignal) { barrier_packet_.completion_signal.handle = 0; if (!skipSignal) { - // Pool size must grow to the size of pending AQL packets - const uint32_t pool_size = index - read; + if (global_signal != nullptr) { + barrier_packet_.completion_signal = global_signal->signal_; + } else { + // Pool size must grow to the size of pending AQL packets + const uint32_t pool_size = index - read; - // Get active signal for current dispatch if profiling is necessary - barrier_packet_.completion_signal = - Barriers().ActiveSignal(kInitSignalValueOne, timestamp_, pool_size); + // Get active signal for current dispatch if profiling is necessary + barrier_packet_.completion_signal = + Barriers().ActiveSignal(kInitSignalValueOne, timestamp_, pool_size); + } } while ((index - hsa_queue_load_read_index_scacquire(gpu_queue_)) >= queueMask); @@ -1218,7 +1202,8 @@ void VirtualGPU::profilingBegin(amd::Command& command, bool drmProfiling) { (*it)->NotifyEvent()->HwEvent() : (*it)->HwEvent(); if (hw_event != nullptr) { Barriers().AddExternalSignal(reinterpret_cast(hw_event)); - } else if (static_cast(*it)->queue() != command.queue()) { + } else if (static_cast(*it)->queue() != command.queue() && + ((*it)->status() != CL_COMPLETE)) { LogPrintfError("Waiting event(%p) doesn't have a HSA signal!\n", *it); } else { // Assume serialization on the same queue... @@ -1239,10 +1224,7 @@ void VirtualGPU::profilingEnd(amd::Command& command) { timestamp_->end(); } command.setData(timestamp_); - // Update HW event only for batches - if ((AMD_DIRECT_DISPATCH) && (command.GetBatchHead() != nullptr)) { - command.SetHwEvent(timestamp_->Signals().back()); - } + timestamp_ = nullptr; } } @@ -2926,13 +2908,46 @@ void VirtualGPU::submitNativeFn(amd::NativeFnCommand& cmd) { // ================================================================================================ void VirtualGPU::submitMarker(amd::Marker& vcmd) { - if (vcmd.profilingInfo().marker_ts_) { + if (AMD_DIRECT_DISPATCH || vcmd.profilingInfo().marker_ts_) { profilingBegin(vcmd); if (timestamp_ != nullptr) { + ProfilingSignal* prof_signal = nullptr; + // If direct dispatch is enabled and the batch head isn't null, then it's a marker and + // requires the batch update upon HSA signal completion + if (AMD_DIRECT_DISPATCH) { + assert(vcmd.GetBatchHead() != nullptr && "Marker doesn't have batch!"); + + prof_signal = dev().GetGlobalSignal(timestamp_); + prof_signal->done_ = false; + + assert(prof_signal != nullptr && "Failed to allocate the global HSA signal!"); + uint32_t init_value = kInitSignalValueOne; + // If API callback is enabled, then use a blocking signal for AQL queue. + // HSA signal will be acquired in SW and released after HSA signal callback + if (vcmd.Callback() != nullptr) { + timestamp_->SetCallbackSignal(prof_signal->signal_); + // Blocks AQL queue from further processing + hsa_signal_add_relaxed(prof_signal->signal_, 1); + init_value += 1; + } + + hsa_status_t result = hsa_amd_signal_async_handler(prof_signal->signal_, + HSA_SIGNAL_CONDITION_LT, init_value, &HsaAmdSignalHandler, timestamp_); + if (HSA_STATUS_SUCCESS != result) { + LogError("hsa_amd_signal_async_handler() failed to set the handler!"); + } else { + ClPrint(amd::LOG_INFO, amd::LOG_SIG, "Set Handler: handle(0x%lx), timestamp(%p)", + prof_signal->signal_.handle, prof_signal); + } + // Update HW event only for batches + vcmd.SetHwEvent(timestamp_->Signals().back()); + } // Submit a barrier with a cache flushes. - dispatchBarrierPacket(kBarrierPacketHeader); - // Reset this flag since we already enable system scope for kBarrierPacketHeader - hasPendingDispatch_ = false; + dispatchBarrierPacket(kBarrierPacketHeader, false, prof_signal); + + // Don't reset the flag for direct dispatch, because the global signals are out of scope + // for internal barrier tracking and SDMA could lose a wait for compute + hasPendingDispatch_ = AMD_DIRECT_DISPATCH; } profilingEnd(vcmd); } @@ -2958,45 +2973,13 @@ void VirtualGPU::submitReleaseExtObjects(amd::ReleaseExtObjectsCommand& vcmd) { // ================================================================================================ void VirtualGPU::flush(amd::Command* list, bool wait) { - // Direct dispatch relies on HSA signal callback - bool skip_cpu_wait = AMD_DIRECT_DISPATCH; - - if (skip_cpu_wait) { - // Search for the last command in the batch to track GPU state - amd::Command* current = list; - assert(current != nullptr && "Empty batch for processing!"); - - // Find the last command - while (current->getNext() != nullptr) { - current = current->getNext(); - } - // Always insert a barrier. Some tests rquire async SDMA wait - hasPendingDispatch_ = true; - // Enable profiling, so runtime can track TS - profilingBegin(*current); - - // If runtime didn't submit a barrier, then it can't track the completion of the batch. - // Hence runtime either has to insert a barrier unconditionally or have a CPU wait. - // Due to performance impact of extra barriers CPU wait is selected. - // Note: if callback will be selected to update the batch status, - // then the host thread can't update it also, otherwise double free may occur - skip_cpu_wait &= hasPendingDispatch_; - - releaseGpuMemoryFence(skip_cpu_wait); - profilingEnd(*current); - } else { - // If barrier is requested, then wait for everything, otherwise - // a per disaptch wait will occur later in updateCommandsState() - releaseGpuMemoryFence(); - } - - // If CPU waited for GPU, then the queue is idle - if (!skip_cpu_wait) { - updateCommandsState(list); + // If barrier is requested, then wait for everything, otherwise + // a per disaptch wait will occur later in updateCommandsState() + releaseGpuMemoryFence(); + updateCommandsState(list); - // Release all pinned memory - releasePinnedMem(); - } + // Release all pinned memory + releasePinnedMem(); } // ================================================================================================ diff --git a/device/rocm/rocvirtual.hpp b/device/rocm/rocvirtual.hpp index aaa05acf6..64f556e41 100644 --- a/device/rocm/rocvirtual.hpp +++ b/device/rocm/rocvirtual.hpp @@ -34,23 +34,9 @@ namespace roc { class Device; class Memory; +struct ProfilingSignal; class Timestamp; -struct ProfilingSignal : public amd::HeapObject { - amd::Monitor lock_; //!< Signal lock for update - hsa_signal_t signal_; //!< HSA signal to track profiling information - Timestamp* ts_; //!< Timestamp object associated with the signal - HwQueueEngine engine_; //!< Engine used with this signal - bool done_; //!< True if signal is done - ProfilingSignal() - : lock_("Signal Ops Lock", true) - , ts_(nullptr) - , engine_(HwQueueEngine::Compute) - , done_(true) - { signal_.handle = 0; } - amd::Monitor& LockSignalOps() { return lock_; } -}; - // Initial HSA signal value constexpr static hsa_signal_value_t kInitSignalValueOne = 1; @@ -139,7 +125,7 @@ class Timestamp : public amd::HeapObject { const bool HwProfiling() const { return !signals_.empty(); } //! Finds execution ticks on GPU - void checkGpuTime(bool event_recycle = false); + void checkGpuTime(); // Start a timestamp (get timestamp from OS) void start() { start_ = amd::Os::timeNanos(); } @@ -225,8 +211,6 @@ class VirtualGPU : public device::VirtualDevice { //! Wait for the curent active signal. Can idle the queue bool WaitCurrent() { ProfilingSignal* signal = signal_list_[current_id_]; - ClPrint(amd::LOG_DEBUG, amd::LOG_MISC, "[%zx]!\t WaitCurret completion_signal=0x%zx", - std::this_thread::get_id(), signal->signal_.handle); return CpuWaitForSignal(signal); } @@ -253,8 +237,6 @@ class VirtualGPU : public device::VirtualDevice { void WaitNext() { size_t next = (current_id_ + 1) % signal_list_.size(); ProfilingSignal* signal = signal_list_[next]; - ClPrint(amd::LOG_DEBUG, amd::LOG_MISC, "[%zx]!\t WaitNext completion_signal=0x%zx", - std::this_thread::get_id(), signal->signal_.handle); CpuWaitForSignal(signal); } @@ -396,7 +378,8 @@ class VirtualGPU : public device::VirtualDevice { template bool dispatchGenericAqlPacket(AqlPacket* packet, uint16_t header, uint16_t rest, bool blocking, size_t size = 1); - void dispatchBarrierPacket(uint16_t packetHeader, bool skipSignal = false); + void dispatchBarrierPacket(uint16_t packetHeader, bool skipSignal = false, + const ProfilingSignal* global_signal = nullptr); bool dispatchCounterAqlPacket(hsa_ext_amd_aql_pm4_packet_t* packet, const uint32_t gfxVersion, bool blocking, const hsa_ven_amd_aqlprofile_1_00_pfn_t* extApi); void dispatchBarrierValuePacket(const hsa_amd_barrier_value_packet_t* packet, diff --git a/platform/command.cpp b/platform/command.cpp index 082deaacc..19c29cce6 100644 --- a/platform/command.cpp +++ b/platform/command.cpp @@ -48,6 +48,7 @@ Event::Event(HostQueue& queue) status_(CL_INT_MAX), hw_event_(nullptr), notify_event_(nullptr), + device_(&queue.device()), profilingInfo_(IS_PROFILER_ON || queue.properties().test(CL_QUEUE_PROFILING_ENABLE) || Agent::shouldPostEventEvents()) { notified_.clear(); @@ -55,7 +56,7 @@ Event::Event(HostQueue& queue) // ================================================================================================ Event::Event() : callbacks_(NULL), status_(CL_SUBMITTED), - hw_event_(nullptr), notify_event_(nullptr) { notified_.clear(); } + hw_event_(nullptr), notify_event_(nullptr), device_(nullptr) { notified_.clear(); } // ================================================================================================ Event::~Event() { @@ -69,6 +70,10 @@ Event::~Event() { if (notify_event_ != nullptr) { notify_event_->release(); } + // Destroy global HW event if available + if ((hw_event_ != nullptr) && (device_ != nullptr)) { + device_->ReleaseGlobalSignal(hw_event_); + } } // ================================================================================================ @@ -259,21 +264,35 @@ bool Event::awaitCompletion() { // ================================================================================================ bool Event::notifyCmdQueue() { HostQueue* queue = command().queue(); - if ((status() > CL_COMPLETE) && (nullptr != queue) && - (!AMD_DIRECT_DISPATCH || - // If HW event was assigned, then notification can be ignored, since a barrier was issued - (HwEvent() == nullptr)) && - !notified_.test_and_set()) { - // Make sure the queue is draining the enqueued commands. - amd::Command* command = new amd::Marker(*queue, false, nullWaitList, this); - if (command == NULL) { - notified_.clear(); - return false; + if (AMD_DIRECT_DISPATCH) { + ScopedLock l(lock_); + if ((status() > CL_COMPLETE) && (nullptr != queue) && + // If HW event was assigned, then notification can be ignored, since a barrier was issued + (HwEvent() == nullptr) && + !notified_.test_and_set()) { + // Make sure the queue is draining the enqueued commands. + amd::Command* command = new amd::Marker(*queue, false, nullWaitList, this); + if (command == NULL) { + notified_.clear(); + return false; + } + ClPrint(LOG_DEBUG, LOG_CMD, "queue marker to command queue: %p", queue); + command->enqueue(); + // Save notification, associated with the current event + notify_event_ = command; + } + } else { + if ((status() > CL_COMPLETE) && (nullptr != queue) && !notified_.test_and_set()) { + // Make sure the queue is draining the enqueued commands. + amd::Command* command = new amd::Marker(*queue, false, nullWaitList, this); + if (command == NULL) { + notified_.clear(); + return false; + } + ClPrint(LOG_DEBUG, LOG_CMD, "queue marker to command queue: %p", queue); + command->enqueue(); + command->release(); } - ClPrint(LOG_DEBUG, LOG_CMD, "queue marker to command queue: %p", queue); - command->enqueue(); - // Save notification, associated with the current event - notify_event_ = command; } return true; } @@ -318,6 +337,7 @@ void Command::enqueue() { // update will occur later after flush() with a wait if (AMD_DIRECT_DISPATCH) { setStatus(CL_QUEUED); + // Notify all commands about the waiter. Barrier will be sent in order to obtain // HSA signal for a wait on the current queue std::for_each(eventWaitList().begin(), eventWaitList().end(), @@ -333,13 +353,10 @@ void Command::enqueue() { // Update batch head for the current marker. Hence the status of all commands can be // updated upon the marker completion SetBatchHead(queue_->GetSubmittionBatch()); - if (profilingInfo().marker_ts_) { - setStatus(CL_SUBMITTED); - submit(*queue_->vdev()); - } else { - // Flush the current batch, but skip the wait on CPU if possible to avoid a stall - queue_->vdev()->flush(queue_->GetSubmittionBatch()); - } + + setStatus(CL_SUBMITTED); + submit(*queue_->vdev()); + // The batch will be tracked with the marker now queue_->ResetSubmissionBatch(); } else { diff --git a/platform/command.hpp b/platform/command.hpp index ec9843a1b..02945b19a 100644 --- a/platform/command.hpp +++ b/platform/command.hpp @@ -95,6 +95,7 @@ class Event : public RuntimeObject { std::atomic_flag notified_; //!< Command queue was notified void* hw_event_; //!< HW event ID associated with SW event Event* notify_event_; //!< Notify event, which should contain HW signal + const Device* device_; //!< Device, this event associated with protected: static const EventWaitList nullWaitList; From 3a2bbad571dc0d66f30ab03e61b0cc63d2992cbc Mon Sep 17 00:00:00 2001 From: Chauncey Hui Date: Tue, 22 Jun 2021 03:00:06 -0400 Subject: [PATCH 038/102] SWDEV-2 - Change OpenCL version number from 3327 to 3328 --- utils/versions.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/utils/versions.hpp b/utils/versions.hpp index 3b6cbf4e2..a27ea1522 100644 --- a/utils/versions.hpp +++ b/utils/versions.hpp @@ -28,7 +28,7 @@ #endif // AMD_PLATFORM_NAME #ifndef AMD_PLATFORM_BUILD_NUMBER -#define AMD_PLATFORM_BUILD_NUMBER 3327 +#define AMD_PLATFORM_BUILD_NUMBER 3328 #endif // AMD_PLATFORM_BUILD_NUMBER #ifndef AMD_PLATFORM_REVISION_NUMBER From 83309d8105df8dbb059ea00e1f3ada46f8a74411 Mon Sep 17 00:00:00 2001 From: Anusha GodavarthySurya Date: Thu, 17 Jun 2021 08:01:34 -0700 Subject: [PATCH 039/102] SWDEV-290901 - update ROC_ACTIVE_WAIT_TIMEOUT to 50us Change-Id: Iba2f2bb882c4786a432a523cb0954761e5359e7f --- utils/flags.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/utils/flags.hpp b/utils/flags.hpp index a9d7edf00..d7a338203 100644 --- a/utils/flags.hpp +++ b/utils/flags.hpp @@ -233,7 +233,7 @@ release(uint, HIP_HIDDEN_FREE_MEM, 0, \ "0 = Disable") \ release(size_t, GPU_FORCE_BLIT_COPY_SIZE, 0, \ "Size in KB of the threshold below which to force blit instead for sdma") \ -release(uint, ROC_ACTIVE_WAIT_TIMEOUT, 750, \ +release(uint, ROC_ACTIVE_WAIT_TIMEOUT, 50, \ "Forces active wait of GPU interrup for the timeout(us)") \ release(bool, ROC_ENABLE_LARGE_BAR, true, \ "Enable Large Bar if supported by the device") \ From 97c20f438111a368d977bd9ccf1b42033d3318f6 Mon Sep 17 00:00:00 2001 From: Jason Tang Date: Sun, 20 Jun 2021 14:24:24 -0400 Subject: [PATCH 040/102] SWDEV-287088 - Workaround VM page fault on Windows Some chunk memory are not guaranteed to be resident during initial allocation. Use CPDMA to force resident. Change-Id: If1a2da3e75f136caaa4c7a29d8f604d6af2639fa --- device/pal/palresource.cpp | 24 ++++++++++++++++++++++++ device/pal/palresource.hpp | 1 + 2 files changed, 25 insertions(+) diff --git a/device/pal/palresource.cpp b/device/pal/palresource.cpp index 2950e8a01..99e8d1c34 100644 --- a/device/pal/palresource.cpp +++ b/device/pal/palresource.cpp @@ -1929,6 +1929,22 @@ bool MemorySubAllocator::InitAllocator(GpuMemoryReference* mem_ref) { return true; } +// ================================================================================================ +void MemorySubAllocator::forceResident(GpuMemoryReference* mem_ref) { + if (IS_WINDOWS) { + // Write one DWORD using CPDMA to force resident + GpuEvent event; + auto gpu = device_->xferQueue(); + uint32_t data = 0; + + gpu->eventBegin(MainEngine); + gpu->queue(MainEngine).addCmdMemRef(mem_ref); + gpu->iCmd()->CmdUpdateMemory(*mem_ref->iMem(), 0, 4, &data); + gpu->eventEnd(MainEngine, event); + gpu->waitForEvent(&event); + } +} + // ================================================================================================ bool MemorySubAllocator::CreateChunk(const Pal::IGpuMemory* reserved_va) { Pal::GpuMemoryCreateInfo createInfo = {}; @@ -1944,6 +1960,8 @@ bool MemorySubAllocator::CreateChunk(const Pal::IGpuMemory* reserved_va) { createInfo.mallPolicy = static_cast(device_->settings().mallPolicy_); GpuMemoryReference* mem_ref = GpuMemoryReference::Create(*device_, createInfo); if (mem_ref != nullptr) { + // Workaround: some chunk memory are not guaranteed to be resident during initial allocation. + forceResident(mem_ref); return InitAllocator(mem_ref); } return false; @@ -1964,6 +1982,8 @@ bool CoarseMemorySubAllocator::CreateChunk(const Pal::IGpuMemory* reserved_va) { createInfo.mallPolicy = static_cast(device_->settings().mallPolicy_); GpuMemoryReference* mem_ref = GpuMemoryReference::Create(*device_, createInfo); if (mem_ref != nullptr) { + // Workaround: some chunk memory are not guaranteed to be resident during initial allocation. + forceResident(mem_ref); return InitAllocator(mem_ref); } return false; @@ -1980,6 +2000,8 @@ bool FineMemorySubAllocator::CreateChunk(const Pal::IGpuMemory* reserved_va) { createInfo.mallPolicy = Pal::GpuMemMallPolicy::Never; GpuMemoryReference* mem_ref = GpuMemoryReference::Create(*device_, createInfo); if ((mem_ref != nullptr) && InitAllocator(mem_ref)) { + // Workaround: some chunk memory are not guaranteed to be resident during initial allocation. + forceResident(mem_ref); mem_ref->iMem()->Map(&mem_ref->cpuAddress_); return mem_ref->cpuAddress_ != nullptr; } @@ -1998,6 +2020,8 @@ bool FineUncachedMemorySubAllocator::CreateChunk(const Pal::IGpuMemory* reserved createInfo.mallPolicy = Pal::GpuMemMallPolicy::Never; GpuMemoryReference* mem_ref = GpuMemoryReference::Create(*device_, createInfo); if ((mem_ref != nullptr) && InitAllocator(mem_ref)) { + // Workaround: some chunk memory are not guaranteed to be resident during initial allocation. + forceResident(mem_ref); mem_ref->iMem()->Map(&mem_ref->cpuAddress_); return mem_ref->cpuAddress_ != nullptr; } diff --git a/device/pal/palresource.hpp b/device/pal/palresource.hpp index ec11a1a24..bf3cf0196 100644 --- a/device/pal/palresource.hpp +++ b/device/pal/palresource.hpp @@ -528,6 +528,7 @@ class MemorySubAllocator : public amd::HeapObject { //! Allocate new chunk of memory virtual bool CreateChunk(const Pal::IGpuMemory* reserved_va); bool InitAllocator(GpuMemoryReference* mem_ref); + void forceResident(GpuMemoryReference* mem_ref); Device* device_; std::unordered_map heaps_; From 71ed43e41de2e859e9c2b3c33ee84d53de7a6520 Mon Sep 17 00:00:00 2001 From: Chauncey Hui Date: Wed, 23 Jun 2021 03:00:06 -0400 Subject: [PATCH 041/102] SWDEV-2 - Change OpenCL version number from 3328 to 3329 --- utils/versions.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/utils/versions.hpp b/utils/versions.hpp index a27ea1522..5a2635308 100644 --- a/utils/versions.hpp +++ b/utils/versions.hpp @@ -28,7 +28,7 @@ #endif // AMD_PLATFORM_NAME #ifndef AMD_PLATFORM_BUILD_NUMBER -#define AMD_PLATFORM_BUILD_NUMBER 3328 +#define AMD_PLATFORM_BUILD_NUMBER 3329 #endif // AMD_PLATFORM_BUILD_NUMBER #ifndef AMD_PLATFORM_REVISION_NUMBER From 00ea40dad1caa6c7679177f5cbee629f2eae2335 Mon Sep 17 00:00:00 2001 From: Vladislav Sytchenko Date: Thu, 10 Jun 2021 16:00:15 -0400 Subject: [PATCH 042/102] SWDEV-274815 - [PAL] Navi24 support Change-Id: I934797bda471618c3f69484a1552b37345ae638b --- device/device.cpp | 4 ++-- device/pal/paldevice.cpp | 1 + device/pal/palsettings.cpp | 1 + 3 files changed, 4 insertions(+), 2 deletions(-) diff --git a/device/device.cpp b/device/device.cpp index bec70e614..07dbbb49c 100644 --- a/device/device.cpp +++ b/device/device.cpp @@ -185,8 +185,8 @@ std::pair Isa::supportedIsas() { {"gfx1030", "gfx1030", true, true, false, 10, 3, 0, NONE, NONE, 2, 32, 1, 256, 64 * Ki, 32}, {"gfx1031", "gfx1031", true, true, false, 10, 3, 1, NONE, NONE, 2, 32, 1, 256, 64 * Ki, 32}, {"gfx1032", "gfx1032", true, true, false, 10, 3, 2, NONE, NONE, 2, 32, 1, 256, 64 * Ki, 32}, - {"gfx1033", "gfx1033", true, false, false, 10, 3, 3, NONE, NONE, 2, 32, 1, 256, 64 * Ki, 32}, - {"gfx1034", "gfx1034", true, false, false, 10, 3, 4, NONE, NONE, 2, 32, 1, 256, 64 * Ki, 32} + {"gfx1033", "gfx1033", true, false, false, 10, 3, 3, NONE, NONE, 2, 32, 1, 256, 64 * Ki, 32}, + {"gfx1034", "gfx1034", true, true, false, 10, 3, 4, NONE, NONE, 2, 32, 1, 256, 64 * Ki, 32}, }; return std::make_pair(std::begin(supportedIsas_), std::end(supportedIsas_)); } diff --git a/device/pal/paldevice.cpp b/device/pal/paldevice.cpp index dbb73c0fb..f2d7e7af2 100644 --- a/device/pal/paldevice.cpp +++ b/device/pal/paldevice.cpp @@ -101,6 +101,7 @@ static constexpr PalDevice supportedPalDevices[] = { {10, 3, 0, Pal::GfxIpLevel::GfxIp10_3, "gfx1030", Pal::AsicRevision::Navi21}, {10, 3, 1, Pal::GfxIpLevel::GfxIp10_3, "gfx1031", Pal::AsicRevision::Navi22}, {10, 3, 2, Pal::GfxIpLevel::GfxIp10_3, "gfx1032", Pal::AsicRevision::Navi23}, + {10, 3, 4, Pal::GfxIpLevel::GfxIp10_3, "gfx1034", Pal::AsicRevision::Navi24}, }; static std::tuple findIsa(Pal::AsicRevision asicRevision, diff --git a/device/pal/palsettings.cpp b/device/pal/palsettings.cpp index 5bb658529..315b63eb6 100644 --- a/device/pal/palsettings.cpp +++ b/device/pal/palsettings.cpp @@ -202,6 +202,7 @@ bool Settings::create(const Pal::DeviceProperties& palProp, case Pal::AsicRevision::Navi12: case Pal::AsicRevision::Navi10: case Pal::AsicRevision::Navi10_A0: + case Pal::AsicRevision::Navi24: case Pal::AsicRevision::Navi23: case Pal::AsicRevision::Navi22: case Pal::AsicRevision::Navi21: From 7c8b93bc2466a3e568201376b5ef534ca30ea07d Mon Sep 17 00:00:00 2001 From: German Andryeyev Date: Wed, 23 Jun 2021 11:41:52 -0400 Subject: [PATCH 043/102] SWDEV-292018 - Avoid marker if queue is empty Change-Id: I40a42d67d2c911d2c9a0bf425f36bc795f9539c0 --- platform/commandqueue.cpp | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/platform/commandqueue.cpp b/platform/commandqueue.cpp index ea80a82de..4f552e4ab 100644 --- a/platform/commandqueue.cpp +++ b/platform/commandqueue.cpp @@ -107,6 +107,10 @@ void HostQueue::finish() { Command* command = nullptr; if (IS_HIP) { command = getLastQueuedCommand(true); + // Check if the queue has nothing to process and return + if (command == nullptr) { + return; + } } if (nullptr == command) { // Send a finish to make sure we finished all commands From 73f0002b8a1341801c8659370c72096152057aff Mon Sep 17 00:00:00 2001 From: Chauncey Hui Date: Fri, 25 Jun 2021 03:00:06 -0400 Subject: [PATCH 044/102] SWDEV-2 - Change OpenCL version number from 3329 to 3330 --- utils/versions.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/utils/versions.hpp b/utils/versions.hpp index 5a2635308..0f4355121 100644 --- a/utils/versions.hpp +++ b/utils/versions.hpp @@ -28,7 +28,7 @@ #endif // AMD_PLATFORM_NAME #ifndef AMD_PLATFORM_BUILD_NUMBER -#define AMD_PLATFORM_BUILD_NUMBER 3329 +#define AMD_PLATFORM_BUILD_NUMBER 3330 #endif // AMD_PLATFORM_BUILD_NUMBER #ifndef AMD_PLATFORM_REVISION_NUMBER From f259dd5fe89a940ec3955d9145a4e5d29e524926 Mon Sep 17 00:00:00 2001 From: pvellien Date: Fri, 7 May 2021 13:46:52 +0000 Subject: [PATCH 045/102] SWDEV-285333 - Introduce Address sanitizer hostcall service Change-Id: Id29aacd09d0a9934a027446c57c7095804e1a454 --- cmake/ROCclrHSA.cmake | 3 +- device/devhcmessages.hpp | 7 +- device/devhostcall.cpp | 131 ++++++++---------------- device/devhostcall.hpp | 98 +++++++++++++++++- device/device.hpp | 11 ++ device/devsanitizer.hpp | 110 ++++++++++++++++++++ device/devurilocator.hpp | 43 ++++++++ device/gpu/gpudevice.hpp | 20 ++++ device/pal/paldevice.hpp | 21 +++- device/rocm/rocdevice.cpp | 13 +++ device/rocm/rocdevice.hpp | 14 +++ device/rocm/rocurilocator.cpp | 183 ++++++++++++++++++++++++++++++++++ device/rocm/rocurilocator.hpp | 48 +++++++++ 13 files changed, 609 insertions(+), 93 deletions(-) create mode 100644 device/devsanitizer.hpp create mode 100644 device/devurilocator.hpp create mode 100644 device/rocm/rocurilocator.cpp create mode 100644 device/rocm/rocurilocator.hpp diff --git a/cmake/ROCclrHSA.cmake b/cmake/ROCclrHSA.cmake index 87cb55e4c..bb0ddc0c1 100644 --- a/cmake/ROCclrHSA.cmake +++ b/cmake/ROCclrHSA.cmake @@ -46,6 +46,7 @@ target_sources(rocclr PRIVATE ${ROCCLR_SRC_DIR}/device/rocm/rocschedcl.cpp ${ROCCLR_SRC_DIR}/device/rocm/rocsettings.cpp ${ROCCLR_SRC_DIR}/device/rocm/rocsignal.cpp - ${ROCCLR_SRC_DIR}/device/rocm/rocvirtual.cpp) + ${ROCCLR_SRC_DIR}/device/rocm/rocvirtual.cpp + ${ROCCLR_SRC_DIR}/device/rocm/rocurilocator.cpp) target_compile_definitions(rocclr PUBLIC WITH_HSA_DEVICE) diff --git a/device/devhcmessages.hpp b/device/devhcmessages.hpp index 7f156decd..4d0f877a9 100644 --- a/device/devhcmessages.hpp +++ b/device/devhcmessages.hpp @@ -70,7 +70,12 @@ enum ServiceID { SERVICE_RESERVED = 0, SERVICE_FUNCTION_CALL = 1, SERVICE_PRINTF = 2, - SERVICE_DEVMEM = 3, + SERVICE_DEVMEM = 3 + #if defined(__clang__) + #if __has_feature(address_sanitizer) + , SERVICE_SANITIZER = 4 + #endif + #endif }; struct Message; diff --git a/device/devhostcall.cpp b/device/devhostcall.cpp index a1bb1b6f7..5b7517b22 100644 --- a/device/devhostcall.cpp +++ b/device/devhostcall.cpp @@ -36,91 +36,11 @@ #include #include -namespace { // anonymous - -enum SignalValue { SIGNAL_DONE = 0, SIGNAL_INIT = 1 }; - -/** \brief Packet payload - * - * Contains 64 slots of 8 ulongs each, one for each workitem in the - * wave. A slot with index \c i contains valid data if the - * corresponding bit in PacketHeader::activemask is set. - */ -struct Payload { - uint64_t slots[64][8]; -}; - -/** Packet header */ -struct PacketHeader { - /** Tagged pointer to the next packet in an intrusive stack */ - uint64_t next_; - /** Bitmask that represents payload slots with valid data */ - uint64_t activemask_; - /** Service ID requested by the wave */ - uint32_t service_; - /** Control bits. - * \li 0: \c READY flag. Indicates packet awaiting a host response. - */ - std::atomic control_; -}; - -static_assert(std::is_standard_layout::value, - "the hostcall packet must be useable from other languages"); - -/** Field offsets in the packet control field */ -enum ControlOffset { - CONTROL_OFFSET_READY_FLAG = 0, - CONTROL_OFFSET_RESERVED0 = 1, -}; - -/** Field widths in the packet control field */ -enum ControlWidth { - CONTROL_WIDTH_READY_FLAG = 1, - CONTROL_WIDTH_RESERVED0 = 31, -}; - -/** \brief Shared buffer submitting hostcall requests. - * - * Holds hostcall packets requested by all kernels executing on the - * same device queue. Each hostcall buffer is associated with at most - * one device queue. - * - * Packets in the buffer are accessed using 64-bit tagged pointers to mitigate - * the ABA problem in lock-free stacks. The index_mask is used to extract the - * lower bits of the pointer, which form the index into the packet array. The - * remaining higher bits define a tag that is incremented on every pop from a - * stack. - */ -class HostcallBuffer { - /** Array of packet headers */ - PacketHeader* headers_; - /** Array of packet payloads */ - Payload* payloads_; - /** Signal used by kernels to indicate new work */ - void* doorbell_; - /** Stack of free packets. Uses tagged pointers. */ - uint64_t free_stack_; - /** Stack of ready packets. Uses tagged pointers */ - std::atomic ready_stack_; - /** Mask for accessing the packet index in the tagged pointer. */ - uint64_t index_mask_; - /** Some services need a device */ - const amd::Device* device_; - - PacketHeader* getHeader(uint64_t ptr) const; - Payload* getPayload(uint64_t ptr) const; - - public: - void processPackets(MessageHandler& messages); - void initialize(uint32_t num_packets); - void setDoorbell(void* doorbell) { doorbell_ = doorbell; }; - void setDevice(const amd::Device* dptr) { device_ = dptr; } -}; - -static_assert(std::is_standard_layout::value, - "the hostcall buffer must be useable from other languages"); - -}; // namespace +#if defined(__clang__) +#if __has_feature(address_sanitizer) +#include "device/devsanitizer.hpp" +#endif +#endif PacketHeader* HostcallBuffer::getHeader(uint64_t ptr) const { return headers_ + (ptr & index_mask_); @@ -205,6 +125,7 @@ void HostcallBuffer::processPackets(MessageHandler& messages) { // Grab the entire ready stack and set the top to 0. New requests from the // device will continue pushing on the stack while we process the packets that // we have grabbed. + uint64_t ready_stack = std::atomic_exchange_explicit(&ready_stack_, static_cast(0), std::memory_order_acquire); if (!ready_stack) { return; @@ -222,6 +143,16 @@ void HostcallBuffer::processPackets(MessageHandler& messages) { auto service = header->service_; auto payload = getPayload(iter); auto activemask = header->activemask_; + +#if defined(__clang__) +#if __has_feature(address_sanitizer) + if (service == SERVICE_SANITIZER) { + handleSanitizerService(payload, activemask, device_, uri_locator); + //activemask zeroed to avoid subsequent handling for each work-item. + activemask = 0; + } +#endif +#endif while (activemask) { auto wi = amd::leastBitSet(activemask); activemask ^= static_cast(1) << wi; @@ -290,7 +221,11 @@ class HostcallListener { std::set buffers_; device::Signal* doorbell_; MessageHandler messages_; - +#if defined(__clang__) +#if __has_feature(address_sanitizer) + device::UriLocator* urilocator = nullptr; +#endif +#endif class Thread : public amd::Thread { public: Thread() : amd::Thread("Hostcall Listener Thread", CQ_THREAD_STACK_SIZE) {} @@ -338,7 +273,6 @@ amd::Monitor listenerLock("Hostcall listener lock"); void HostcallListener::consumePackets() { uint64_t timeout = 1024 * 1024; uint64_t signal_value = SIGNAL_INIT; - while (true) { while (true) { uint64_t new_value = doorbell_->Wait(signal_value, device::Signal::Condition::Ne, timeout); @@ -349,7 +283,6 @@ void HostcallListener::consumePackets() { } if (signal_value == SIGNAL_DONE) { - ClPrint(amd::LOG_INFO, amd::LOG_INIT, "Hostcall listener received SIGNAL_DONE"); return; } @@ -375,12 +308,23 @@ void HostcallListener::terminate() { amd::Os::yield(); } +#if defined(__clang__) +#if __has_feature(address_sanitizer) + if (urilocator) + delete urilocator; +#endif +#endif delete doorbell_; } void HostcallListener::addBuffer(HostcallBuffer* buffer) { assert(buffers_.count(buffer) == 0 && "buffer already present"); buffer->setDoorbell(doorbell_->getHandle()); +#if defined(__clang__) +#if __has_feature(address_sanitizer) + buffer->setUriLocator(urilocator); +#endif +#endif buffers_.insert(buffer); } @@ -400,10 +344,21 @@ bool HostcallListener::initialize(const amd::Device &dev) { return false; } +#if defined(__clang__) +#if __has_feature(address_sanitizer) + urilocator = dev.createUriLocator(); +#endif +#endif // If the listener thread was not successfully initialized, clean // everything up and bail out. if (thread_.state() < Thread::INITIALIZED) { delete doorbell_; +#if defined(__clang__) +#if __has_feature(address_sanitizer) + if (urilocator) + delete urilocator; +#endif +#endif return false; } diff --git a/device/devhostcall.hpp b/device/devhostcall.hpp index 1a3f275a9..ae2047c33 100644 --- a/device/devhostcall.hpp +++ b/device/devhostcall.hpp @@ -22,9 +22,14 @@ #include "top.hpp" #include "device/device.hpp" - +#include "device/devhcmessages.hpp" #include +#if defined(__clang__) +#if __has_feature(address_sanitizer) +#include "device/devurilocator.hpp" +#endif +#endif /** \file Support for invoking host services from the device. * * A hostcall is a fixed-size request generated by a kernel running @@ -79,3 +84,94 @@ uint32_t getHostcallBufferAlignment(void); bool enableHostcalls(const amd::Device& dev, void* buffer, uint32_t numPackets); void disableHostcalls(void* buffer); + +enum SignalValue { SIGNAL_DONE = 0, SIGNAL_INIT = 1 }; + +/** \brief Packet payload + * + * Contains 64 slots of 8 ulongs each, one for each workitem in the + * wave. A slot with index \c i contains valid data if the + * corresponding bit in PacketHeader::activemask is set. + */ +struct Payload { + uint64_t slots[64][8]; +}; + +/** Packet header */ +struct PacketHeader { + /** Tagged pointer to the next packet in an intrusive stack */ + uint64_t next_; + /** Bitmask that represents payload slots with valid data */ + uint64_t activemask_; + /** Service ID requested by the wave */ + uint32_t service_; + /** Control bits. + * \li 0: \c READY flag. Indicates packet awaiting a host response. + */ + std::atomic control_; +}; + +static_assert(std::is_standard_layout::value, + "the hostcall packet must be useable from other languages"); + +/** Field offsets in the packet control field */ +enum ControlOffset { + CONTROL_OFFSET_READY_FLAG = 0, + CONTROL_OFFSET_RESERVED0 = 1, +}; + +/** Field widths in the packet control field */ +enum ControlWidth { + CONTROL_WIDTH_READY_FLAG = 1, + CONTROL_WIDTH_RESERVED0 = 31, +}; + +/** \brief Shared buffer submitting hostcall requests. + * + * Holds hostcall packets requested by all kernels executing on the + * same device queue. Each hostcall buffer is associated with at most + * one device queue. + * + * Packets in the buffer are accessed using 64-bit tagged pointers to mitigate + * the ABA problem in lock-free stacks. The index_mask is used to extract the + * lower bits of the pointer, which form the index into the packet array. The + * remaining higher bits define a tag that is incremented on every pop from a + * stack. + */ +class HostcallBuffer { + /** Array of packet headers */ + PacketHeader* headers_; + /** Array of packet payloads */ + Payload* payloads_; + /** Signal used by kernels to indicate new work */ + void* doorbell_; + /** Stack of free packets. Uses tagged pointers. */ + uint64_t free_stack_; + /** Stack of ready packets. Uses tagged pointers */ + std::atomic ready_stack_; + /** Mask for accessing the packet index in the tagged pointer. */ + uint64_t index_mask_; + /** Some services need a device**/ + const amd::Device* device_; + + PacketHeader* getHeader(uint64_t ptr) const; + Payload* getPayload(uint64_t ptr) const; + + public: + void processPackets(MessageHandler& messages); + void initialize(uint32_t num_packets); + void setDoorbell(void* doorbell) { doorbell_ = doorbell; }; + void setDevice(const amd::Device* dptr) { device_ = dptr; }; + + #if defined(__clang__) + #if __has_feature(address_sanitizer) + private: + device::UriLocator* uri_locator; + public: + void setUriLocator(device::UriLocator* uri_l) { uri_locator = uri_l; }; + #endif + #endif +}; + +static_assert(std::is_standard_layout::value, + "the hostcall buffer must be useable from other languages"); diff --git a/device/device.hpp b/device/device.hpp index 7c0e13db1..69eb60a2d 100644 --- a/device/device.hpp +++ b/device/device.hpp @@ -40,6 +40,12 @@ #include "hwdebug.hpp" #include "devsignal.hpp" +#if defined(__clang__) +#if __has_feature(address_sanitizer) +#include "devurilocator.hpp" +#endif +#endif + #include #include #include @@ -1838,6 +1844,11 @@ class Device : public RuntimeObject { virtual amd::Memory* GetArenaMemObj(const void* ptr, size_t& offset) { return nullptr; } +#if defined(__clang__) +#if __has_feature(address_sanitizer) + virtual device::UriLocator* createUriLocator() const = 0; +#endif +#endif protected: //! Enable the specified extension diff --git a/device/devsanitizer.hpp b/device/devsanitizer.hpp new file mode 100644 index 000000000..5483b40ba --- /dev/null +++ b/device/devsanitizer.hpp @@ -0,0 +1,110 @@ +/* Copyright (c) 2021-present Advanced Micro Devices, Inc. + + Permission is hereby granted, free of charge, to any person obtaining a copy + of this software and associated documentation files (the "Software"), to deal + in the Software without restriction, including without limitation the rights + to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + copies of the Software, and to permit persons to whom the Software is + furnished to do so, subject to the following conditions: + + The above copyright notice and this permission notice shall be included in + all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + THE SOFTWARE. */ + +#pragma once +#include "device/devhostcall.hpp" +#include "device/device.hpp" +#include "device/devurilocator.hpp" +#include "utils/debug.hpp" +#include "platform/memory.hpp" + +#include //to exp +#include +#include +#include +#include + +//Address sanitizer runtime entry-function to report the invalid device memory access +//this will be defined in llvm-project/compiler-rt/lib/asan, and will have effect only +//when compiler-rt is build for AMDGPU. +//Note: This API is runtime interface of asan library and only defined for linux os. +extern "C" +void __asan_report_nonself_error(uint64_t *callstack, uint32_t n_callstack, uint64_t* addr, + uint32_t naddr, uint64_t* entity_ids, uint32_t n_entities, bool is_write, uint32_t access_size, + bool is_abort, const char* name, int64_t vma_adjust, int fd, + uint64_t file_extent_size, uint64_t file_extent_start = 0); + +namespace { +void handleSanitizerService(Payload* packt_payload, uint64_t activemask, + const amd::Device* gpu_device, device::UriLocator* uri_locator) { + // An address results in invalid access in each active lane + uint64_t device_failing_addresses[64]; + // An array of identifications of entities requesting a report. + // index 0 - contains device id + // index 1,2,3 - contains wg_idx, wg_idy, wg_idz respectively. + // index 4 to 67 - contains reporting wave ids in a wave-front. + uint64_t entity_id[68], callstack[1]; + uint32_t n_activelanes = __builtin_popcountl(activemask); + uint64_t access_info = 0, access_size = 0; + bool is_abort = true; + entity_id[0] = gpu_device->index(); + + assert(packt_payload != nullptr && "packet payload is null?"); + + int indx = 0, en_idx = 1; + bool first_workitem = false; + while (activemask) { + auto wi = amd::leastBitSet(activemask); + activemask ^= static_cast(1) << wi; + auto data_slot = packt_payload->slots[wi]; + //encoding of packet payload arguments is + //defined in device-libs/asanrtl/src/report.cl + if (!first_workitem) { + device_failing_addresses[indx] = data_slot[0]; + callstack[0] = data_slot[1]; + entity_id[en_idx] = data_slot[2]; + entity_id[++en_idx] = data_slot[3]; + entity_id[++en_idx] = data_slot[4]; + entity_id[++en_idx] = data_slot[5]; + access_info = data_slot[6]; + access_size = data_slot[7]; + first_workitem = true; + } + else { + device_failing_addresses[indx] = data_slot[0]; + entity_id[en_idx] = data_slot[5]; + } + indx++; + en_idx++; + } + + bool is_write = false; + if (access_info & 0xFFFFFFFF00000000) + is_abort = false; + if (access_info & 1) + is_write = true; + + std::string fileuri; + uint64_t size = 0, offset = 0; + int64_t loadAddrAdjust = 0; + auto uri_fd = amd::Os::FDescInit(); + if (uri_locator) { + device::UriLocator::UriInfo fileuri_info = uri_locator->lookUpUri(callstack[0]); + std::tie(offset, size) = uri_locator->decodeUriAndGetFd(fileuri_info, &uri_fd); + loadAddrAdjust = fileuri_info.loadAddressDiff; + } + +#if defined(__linux__) + __asan_report_nonself_error(callstack, 1, device_failing_addresses, n_activelanes, + entity_id, n_activelanes+4, is_write, access_size, is_abort, + /*thread key*/"amdgpu", loadAddrAdjust, uri_fd, size, offset); +#endif +} +} //end anonymous namespace diff --git a/device/devurilocator.hpp b/device/devurilocator.hpp new file mode 100644 index 000000000..dcb31b5de --- /dev/null +++ b/device/devurilocator.hpp @@ -0,0 +1,43 @@ +/* Copyright (c) 2019-present Advanced Micro Devices, Inc. + + Permission is hereby granted, free of charge, to any person obtaining a copy + of this software and associated documentation files (the "Software"), to deal + in the Software without restriction, including without limitation the rights + to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + copies of the Software, and to permit persons to whom the Software is + furnished to do so, subject to the following conditions: + + The above copyright notice and this permission notice shall be included in + all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + THE SOFTWARE. */ + +#pragma once +#if defined(__clang__) +#if __has_feature(address_sanitizer) +#include "os/os.hpp" +#include +#include +namespace device{ +// Interface for HSA/PAL Uri Locators +class UriLocator { + public: + struct UriInfo { + std::string uriPath; + int64_t loadAddressDiff; + }; + + virtual ~UriLocator() {} + virtual UriInfo lookUpUri(uint64_t device_pc) = 0; + virtual std::pair decodeUriAndGetFd(UriInfo& uri, + amd::Os::FileDesc* uri_fd) = 0; +}; +} //namespace device +#endif +#endif diff --git a/device/gpu/gpudevice.hpp b/device/gpu/gpudevice.hpp index 35a83c768..1ee64f52d 100644 --- a/device/gpu/gpudevice.hpp +++ b/device/gpu/gpudevice.hpp @@ -42,6 +42,12 @@ #include "hsailctx.hpp" #include "vaminterface.h" +#if defined(__clang__) +#if __has_feature(address_sanitizer) +#include "device/devurilocator.hpp" +#endif +#endif + /*! \addtogroup GPU * @{ */ @@ -140,6 +146,13 @@ class NullDevice : public amd::Device { virtual bool SetClockMode(const cl_set_device_clock_mode_input_amd setClockModeInput, cl_set_device_clock_mode_output_amd* pSetClockModeOutput) { return true; } +#if defined(__clang__) +#if __has_feature(address_sanitizer) + virtual device::UriLocator* createUriLocator() const { + return nullptr; + } +#endif +#endif protected: //! Answer the question: "Should HSAIL Program be created?", //! based on the given options. @@ -548,6 +561,13 @@ class Device : public NullDevice, public CALGSLDevice { //! Initial the Hardware Debug Manager int32_t hwDebugManagerInit(amd::Context* context, uintptr_t messageStorage); +#if defined(__clang__) +#if __has_feature(address_sanitizer) + virtual device::UriLocator* createUriLocator() const { + return nullptr; + } +#endif +#endif private: //! Disable copy constructor Device(const Device&); diff --git a/device/pal/paldevice.hpp b/device/pal/paldevice.hpp index 052577e72..6f19ebebe 100644 --- a/device/pal/paldevice.hpp +++ b/device/pal/paldevice.hpp @@ -43,6 +43,11 @@ #include #include +#if defined(__clang__) +#if __has_feature(address_sanitizer) +#include "device/devurilocator.hpp" +#endif +#endif /*! \addtogroup PAL * @{ */ @@ -145,7 +150,13 @@ class NullDevice : public amd::Device { cl_set_device_clock_mode_output_amd* pSetClockModeOutput) { return true; } - +#if defined(__clang__) +#if __has_feature(address_sanitizer) + virtual device::UriLocator* createUriLocator() const { + return nullptr; + } +#endif +#endif protected: static Util::GenericAllocator allocator_; //!< Generic memory allocator in PAL @@ -593,7 +604,13 @@ class Device : public NullDevice { virtual bool importExtSemaphore(void** extSemaphore, const amd::Os::FileDesc& handle); virtual void DestroyExtSemaphore(void* extSemaphore); - +#if defined(__clang__) +#if __has_feature(address_sanitizer) + virtual device::UriLocator* createUrilocator() const { + return nullptr; + } +#endif +#endif private: static void PAL_STDCALL PalDeveloperCallback(void* pPrivateData, const Pal::uint32 deviceIndex, Pal::Developer::CallbackType type, void* pCbData); diff --git a/device/rocm/rocdevice.cpp b/device/rocm/rocdevice.cpp index c6e94732a..0bb98ec59 100644 --- a/device/rocm/rocdevice.cpp +++ b/device/rocm/rocdevice.cpp @@ -45,6 +45,12 @@ #endif #include "platform/sampler.hpp" +#if defined(__clang__) +#if __has_feature(address_sanitizer) +#include "device/rocm/rocurilocator.hpp" +#endif +#endif + #include #include #include @@ -3026,5 +3032,12 @@ void Device::ReleaseGlobalSignal(void* signal) const { } } +#if defined(__clang__) +#if __has_feature(address_sanitizer) +device::UriLocator* Device::createUriLocator() const { + return new roc::UriLocator(); +} +#endif +#endif } // namespace roc #endif // WITHOUT_HSA_BACKEND diff --git a/device/rocm/rocdevice.hpp b/device/rocm/rocdevice.hpp index 0fb6f0b8e..b408a9d71 100644 --- a/device/rocm/rocdevice.hpp +++ b/device/rocm/rocdevice.hpp @@ -254,6 +254,15 @@ class NullDevice : public amd::Device { virtual bool IsHwEventReady(const amd::Event& event, bool wait = false) const { return false; } virtual void ReleaseGlobalSignal(void* signal) const {} +#if defined(__clang__) +#if __has_feature(address_sanitizer) + virtual device::UriLocator* createUriLocator() const { + ShouldNotReachHere(); + return nullptr; + } +#endif +#endif + protected: //! Initialize compiler instance and handle static bool initCompiler(bool isOffline); @@ -599,6 +608,11 @@ class Device : public NullDevice { //! enum for keeping the total and available queue priorities enum QueuePriority : uint { Low = 0, Normal = 1, High = 2, Total = 3}; +#if defined(__clang__) +#if __has_feature(address_sanitizer) + virtual device::UriLocator* createUriLocator() const; +#endif +#endif }; // class roc::Device } // namespace roc diff --git a/device/rocm/rocurilocator.cpp b/device/rocm/rocurilocator.cpp new file mode 100644 index 000000000..acc01e75c --- /dev/null +++ b/device/rocm/rocurilocator.cpp @@ -0,0 +1,183 @@ +/* Copyright (c) 2021-present Advanced Micro Devices, Inc. + + Permission is hereby granted, free of charge, to any person obtaining a copy + of this software and associated documentation files (the "Software"), to deal + in the Software without restriction, including without limitation the rights + to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + copies of the Software, and to permit persons to whom the Software is + furnished to do so, subject to the following conditions: + + The above copyright notice and this permission notice shall be included in + all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + THE SOFTWARE. */ + +#if defined(__clang__) +#if __has_feature(address_sanitizer) +#include "rocurilocator.hpp" +#include + +namespace roc { +hsa_status_t UriLocator::createUriRangeTable() { + auto execCb = [] (hsa_executable_t exec, + void *data) -> hsa_status_t { + int execState = 0; + hsa_status_t status; + status = hsa_executable_get_info(exec, HSA_EXECUTABLE_INFO_STATE, &execState); + if (status != HSA_STATUS_SUCCESS) + return status; + if (execState != HSA_EXECUTABLE_STATE_FROZEN) + return status; + + auto loadedCodeObjectCb = [] (hsa_executable_t exec, + hsa_loaded_code_object_t lcobj, void *data) -> hsa_status_t { + hsa_status_t result; + uint64_t loadBAddr = 0, loadSize = 0; + uint32_t uriLen = 0; + int64_t delta = 0; + uint64_t *argsCb = static_cast(data); + hsa_ven_amd_loader_1_03_pfn_t *fnTab = + reinterpret_cast (argsCb[0]); + std::vector *rangeTab = + reinterpret_cast*> (argsCb[1]); + + if (!fnTab->hsa_ven_amd_loader_loaded_code_object_get_info) + return HSA_STATUS_ERROR; + + result = fnTab->hsa_ven_amd_loader_loaded_code_object_get_info(lcobj, + HSA_VEN_AMD_LOADER_LOADED_CODE_OBJECT_INFO_LOAD_BASE, (void*) &loadBAddr); + if (result != HSA_STATUS_SUCCESS) + return result; + + result = fnTab->hsa_ven_amd_loader_loaded_code_object_get_info(lcobj, + HSA_VEN_AMD_LOADER_LOADED_CODE_OBJECT_INFO_LOAD_SIZE, (void*) &loadSize); + if (result != HSA_STATUS_SUCCESS) + return result; + + result = fnTab->hsa_ven_amd_loader_loaded_code_object_get_info(lcobj, + HSA_VEN_AMD_LOADER_LOADED_CODE_OBJECT_INFO_URI_LENGTH, (void*) &uriLen); + if (result != HSA_STATUS_SUCCESS) + return result; + + result = fnTab-> hsa_ven_amd_loader_loaded_code_object_get_info(lcobj, + HSA_VEN_AMD_LOADER_LOADED_CODE_OBJECT_INFO_LOAD_DELTA, (void*) &delta); + if (result != HSA_STATUS_SUCCESS) + return result; + + char *uri = new char[uriLen+1]; + uri[uriLen] = '\0'; + result = fnTab->hsa_ven_amd_loader_loaded_code_object_get_info(lcobj, + HSA_VEN_AMD_LOADER_LOADED_CODE_OBJECT_INFO_URI, (void*) uri); + if (result != HSA_STATUS_SUCCESS) + return result; + rangeTab->push_back(UriRange{loadBAddr, loadBAddr+loadSize-1, + delta, std::string{uri,uriLen+1}}); + delete[] uri; + return HSA_STATUS_SUCCESS; + }; + + uint64_t *args = static_cast(data); + hsa_ven_amd_loader_1_03_pfn_t *fnExtTab = + reinterpret_cast (args[0]); + return fnExtTab->hsa_ven_amd_loader_executable_iterate_loaded_code_objects(exec, + loadedCodeObjectCb, data); + }; + + if (!fn_table_.hsa_ven_amd_loader_iterate_executables) + return HSA_STATUS_ERROR; + + uint64_t callbackArgs[2] = {(uint64_t)& fn_table_, (uint64_t) &rangeTab_}; + return fn_table_.hsa_ven_amd_loader_iterate_executables(execCb, (void*) callbackArgs); +} + +// Encoding of uniform-resource-identifier(URI) is detailed in +// https://llvm.org/docs/AMDGPUUsage.html#loaded-code-object-path-uniform-resource-identifier-uri +std::pair UriLocator::decodeUriAndGetFd(UriInfo& uri, + amd::Os::FileDesc* uri_fd) { + std::ostringstream ss; + char cur; + uint64_t offset = 0, size = 0; + if (uri.uriPath.size() == 0) + return {0,0}; + auto pos = uri.uriPath.find("//"); + if (pos == std::string::npos || uri.uriPath.substr(0, pos) != "file:") { + uri.uriPath=""; + return {0,0}; + } + auto rspos = uri.uriPath.find('#'); + if (rspos != std::string::npos) { + //parse range specifier + std::string offprefix = "offset=", sizeprefix = "size="; + auto sbeg = uri.uriPath.find('&',rspos); + auto offbeg = rspos + offprefix.size()+1; + std::string offstr = uri.uriPath.substr(offbeg, sbeg - offbeg); + auto sizebeg = sbeg + sizeprefix.size()+1; + std::string sizestr = uri.uriPath.substr(sizebeg, uri.uriPath.size()-sizebeg); + offset = std::stoull(offstr, nullptr, 0); + size = std::stoull(sizestr, nullptr, 0); + rspos -= 1; + } + else { + rspos = uri.uriPath.size()-1; + } + pos += 2; + //decode filepath + for (auto i=pos; i<= rspos;) { + cur = uri.uriPath[i]; + if (isalnum(cur) || cur == '/' || cur == '-' || + cur == '_' || cur == '.' || cur == '~') { + ss << cur; + i++; + } + else { + //characters prefix with '%' char + char tbits = uri.uriPath[i+1], lbits = uri.uriPath[i+2]; + uint8_t t = (tbits < 58) ? ( tbits - 48) : ((tbits - 65) + 10); + uint8_t l = (lbits < 58) ? ( lbits - 48) : ((lbits - 65) + 10); + ss << (char)(((0b00000000 | t)<<4) | l); + i += 3; + } + } + uri.uriPath = ss.str(); + size_t fd_size; + (void) amd::Os::GetFileHandle(uri.uriPath.c_str(), uri_fd, &fd_size); + // As per URI locator syntax, range_specifier is optional + // if range_specifier is absent return total size of the file + // and set offset to begin at 0. + if (size == 0) + size = fd_size; + return {offset, size}; +} + +UriLocator::UriInfo UriLocator::lookUpUri(uint64_t device_pc) { + UriInfo errorstate{"", 0}; + + if (!init_) { + hsa_status_t result; + result = hsa_system_get_major_extension_table(HSA_EXTENSION_AMD_LOADER, 1, + sizeof(fn_table_), &fn_table_); + if (result != HSA_STATUS_SUCCESS) + return errorstate; + result = createUriRangeTable(); + if (result != HSA_STATUS_SUCCESS) { + rangeTab_.clear(); + return errorstate; + } + init_ = true; + } + + for(auto& seg : rangeTab_) + if (seg.startAddr_ <= device_pc && device_pc <= seg.endAddr_) + return UriInfo{seg.Uri_.c_str(), seg.elfDelta_}; + + return errorstate; +} +} //namespace roc +#endif +#endif diff --git a/device/rocm/rocurilocator.hpp b/device/rocm/rocurilocator.hpp new file mode 100644 index 000000000..8331b479c --- /dev/null +++ b/device/rocm/rocurilocator.hpp @@ -0,0 +1,48 @@ +/* Copyright (c) 2019-present Advanced Micro Devices, Inc. + + Permission is hereby granted, free of charge, to any person obtaining a copy + of this software and associated documentation files (the "Software"), to deal + in the Software without restriction, including without limitation the rights + to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + copies of the Software, and to permit persons to whom the Software is + furnished to do so, subject to the following conditions: + + The above copyright notice and this permission notice shall be included in + all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + THE SOFTWARE. */ + +#pragma once +#if defined(__clang__) +#if __has_feature(address_sanitizer) +#include "device/devurilocator.hpp" +#include "hsa_ven_amd_loader.h" + +#include +namespace roc { +class UriLocator : public device::UriLocator { + bool init_ = false; + struct UriRange { + uint64_t startAddr_, endAddr_; + int64_t elfDelta_; + std::string Uri_; + }; + std::vector rangeTab_; + hsa_ven_amd_loader_1_03_pfn_t fn_table_; + + hsa_status_t createUriRangeTable(); + public: + virtual ~UriLocator() {} + virtual UriInfo lookUpUri(uint64_t device_pc) override; + virtual std::pair decodeUriAndGetFd(UriInfo& uri_path, + amd::Os::FileDesc* uri_fd) override; +}; +} +#endif +#endif From debd9290fe2044d374ac95f83821440097720847 Mon Sep 17 00:00:00 2001 From: Chauncey Hui Date: Tue, 29 Jun 2021 03:00:07 -0400 Subject: [PATCH 046/102] SWDEV-2 - Change OpenCL version number from 3330 to 3331 --- utils/versions.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/utils/versions.hpp b/utils/versions.hpp index 0f4355121..077e61579 100644 --- a/utils/versions.hpp +++ b/utils/versions.hpp @@ -28,7 +28,7 @@ #endif // AMD_PLATFORM_NAME #ifndef AMD_PLATFORM_BUILD_NUMBER -#define AMD_PLATFORM_BUILD_NUMBER 3330 +#define AMD_PLATFORM_BUILD_NUMBER 3331 #endif // AMD_PLATFORM_BUILD_NUMBER #ifndef AMD_PLATFORM_REVISION_NUMBER From de621fd5efbcf61061685db6aac2ff6957f43e43 Mon Sep 17 00:00:00 2001 From: jujiang Date: Mon, 28 Jun 2021 13:42:52 -0400 Subject: [PATCH 047/102] SWDEV-286322 - Add .gitattribute Change-Id: I4ae4c4498765f10c98bb5f2443a083463b0e25af --- .gitattributes | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) create mode 100644 .gitattributes diff --git a/.gitattributes b/.gitattributes new file mode 100644 index 000000000..55bb72f28 --- /dev/null +++ b/.gitattributes @@ -0,0 +1,21 @@ +# Set the default behavior, in case people don't have core.autolf set. +* text=auto + +# Explicitly declare text files you want to always be normalized and converted +# to have LF line endings on checkout. +*.c text eol=lf +*.cpp text eol=lf +*.cc text eol=lf +*.h text eol=lf +*.hpp text eol=lf +*.txt text eol=lf + +# Define files to support auto-remove trailing white space +# Need to run the command below, before add modified file(s) to the staging area +# git config filter.trimspace.clean 'sed -e "s/[[:space:]]*$//g"' +*.cpp filter=trimspace +*.c filter=trimspace +*.h filter=trimspacecpp +*.hpp filter=trimspace +*.md filter=trimspace + From 0c400c483aa742a04f04c4ff09ffa56642cd9671 Mon Sep 17 00:00:00 2001 From: Chauncey Hui Date: Wed, 30 Jun 2021 03:00:06 -0400 Subject: [PATCH 048/102] SWDEV-2 - Change OpenCL version number from 3331 to 3332 --- utils/versions.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/utils/versions.hpp b/utils/versions.hpp index 077e61579..498c519e1 100644 --- a/utils/versions.hpp +++ b/utils/versions.hpp @@ -28,7 +28,7 @@ #endif // AMD_PLATFORM_NAME #ifndef AMD_PLATFORM_BUILD_NUMBER -#define AMD_PLATFORM_BUILD_NUMBER 3331 +#define AMD_PLATFORM_BUILD_NUMBER 3332 #endif // AMD_PLATFORM_BUILD_NUMBER #ifndef AMD_PLATFORM_REVISION_NUMBER From 5d71744b8f908a356ff56b45ed74cb06007905e2 Mon Sep 17 00:00:00 2001 From: German Andryeyev Date: Wed, 30 Jun 2021 14:36:10 -0400 Subject: [PATCH 049/102] SWDEV-289378 - Avoid CPU TS overwrite if GPU TS is available Change-Id: I2fc1a43c048b4fa1e42f27c4cf5c72fc7ae84e9c --- device/rocm/rocvirtual.hpp | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/device/rocm/rocvirtual.hpp b/device/rocm/rocvirtual.hpp index 64f556e41..fcf75b094 100644 --- a/device/rocm/rocvirtual.hpp +++ b/device/rocm/rocvirtual.hpp @@ -131,7 +131,14 @@ class Timestamp : public amd::HeapObject { void start() { start_ = amd::Os::timeNanos(); } // End a timestamp (get timestamp from OS) - void end() { end_ = amd::Os::timeNanos(); } + void end() { + // Timestamp value can be updated by HW profiling if current command had a stall. + // Although CPU TS should be still valid in this situation, there are cases in VM mode + // when CPU timeline is out of sync with GPU timeline and shifted time can be reported + if (end_ != 0) { + end_ = amd::Os::timeNanos(); + } + } static void setGpuTicksToTime(double ticksToTime) { ticksToTime_ = ticksToTime; } static double getGpuTicksToTime() { return ticksToTime_; } From 2df4bacc6f2e028736b41babfd4d2e89803e9b0c Mon Sep 17 00:00:00 2001 From: German Andryeyev Date: Tue, 6 Jul 2021 14:09:22 -0400 Subject: [PATCH 050/102] SWDEV-289378 - Avoid CPU TS overwrite if GPU TS is available Change-Id: I444ec284669b6a6fff31182e213b47c562c90192 --- device/rocm/rocvirtual.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/device/rocm/rocvirtual.hpp b/device/rocm/rocvirtual.hpp index fcf75b094..277289731 100644 --- a/device/rocm/rocvirtual.hpp +++ b/device/rocm/rocvirtual.hpp @@ -135,7 +135,7 @@ class Timestamp : public amd::HeapObject { // Timestamp value can be updated by HW profiling if current command had a stall. // Although CPU TS should be still valid in this situation, there are cases in VM mode // when CPU timeline is out of sync with GPU timeline and shifted time can be reported - if (end_ != 0) { + if (end_ == 0) { end_ = amd::Os::timeNanos(); } } From 65c1c12097f45b9e8cdf68bd018594fd08bb75a5 Mon Sep 17 00:00:00 2001 From: Saleel Kudchadker Date: Fri, 2 Jul 2021 09:05:33 -0700 Subject: [PATCH 051/102] SWDEV-260448 - Honor NUMACTL for Direct Dispatch Setting AMD_CPU_AFFINITY=1 will keep Async Handler thread within the bounds set by numactl. Change-Id: Id01b30df5127d65c29ac072bf74a04986b7128de --- device/rocm/rocvirtual.cpp | 3 ++- os/os.hpp | 5 ++++- os/os_posix.cpp | 11 ++++++++++- os/os_win32.cpp | 3 +++ 4 files changed, 19 insertions(+), 3 deletions(-) diff --git a/device/rocm/rocvirtual.cpp b/device/rocm/rocvirtual.cpp index d31f01982..2e62b0336 100644 --- a/device/rocm/rocvirtual.cpp +++ b/device/rocm/rocvirtual.cpp @@ -157,7 +157,8 @@ bool HsaAmdSignalHandler(hsa_signal_value_t value, void* arg) { amd::Thread* thread = amd::Thread::current(); if (!(thread != nullptr || - ((thread = new amd::HostThread()) != nullptr && thread == amd::Thread::current()))) { + ((thread = new amd::HostThread()) != nullptr && thread == amd::Thread::current() && + amd::Os::setThreadAffinityToMainThread()))) { return false; } diff --git a/os/os.hpp b/os/os.hpp index 989363fc0..811bc0691 100644 --- a/os/os.hpp +++ b/os/os.hpp @@ -121,7 +121,7 @@ class Os : AllStatic { // Given a valid file name, returns mmapped memory with the mapped size. static bool MemoryMapFile(const char* fname, const void** mmap_ptr, size_t* mmap_size); - // Given a valid file name amd mapped size, returns ftruncated mmaped memory + // Given a valid file name amd mapped size, returns ftruncated mmaped memory static bool MemoryMapFileTruncated(const char* fname, const void** mmap_ptr, size_t mmap_size); // Given a valid mmaped ptr with correct size, unmaps the ptr from memory @@ -181,6 +181,9 @@ class Os : AllStatic { static void setThreadAffinity(const void* handle, const ThreadAffinityMask& mask); //! Set the currently running thread's name. static void setCurrentThreadName(const char* name); + //! Set current threads affinity to that of main thread + static bool setThreadAffinityToMainThread(); + //! Check if the thread is alive static bool isThreadAlive(const Thread& osThread); diff --git a/os/os_posix.cpp b/os/os_posix.cpp index 2d5b89a58..0868dbc15 100644 --- a/os/os_posix.cpp +++ b/os/os_posix.cpp @@ -131,6 +131,7 @@ static pthread_setaffinity_fn pthread_setaffinity_fptr; static void init() __attribute__((constructor(101))); static void init() { Os::init(); } +static cpu_set_t nativeMask_; bool Os::installSigfpeHandler() { // Install a SIGFPE signal handler @todo: Chain the handlers @@ -160,6 +161,7 @@ bool Os::init() { pageSize_ = (size_t)::sysconf(_SC_PAGESIZE); processorCount_ = ::sysconf(_SC_NPROCESSORS_CONF); + pthread_getaffinity_np(pthread_self(), sizeof(cpu_set_t), &nativeMask_); pthread_setaffinity_fptr = (pthread_setaffinity_fn)dlsym(RTLD_NEXT, "pthread_setaffinity_np"); return Thread::init(); @@ -392,13 +394,20 @@ const void* Os::createOsThread(amd::Thread* thread) { return reinterpret_cast(handle); } - void Os::setThreadAffinity(const void* handle, const Os::ThreadAffinityMask& mask) { if (pthread_setaffinity_fptr != NULL) { pthread_setaffinity_fptr((pthread_t)handle, sizeof(cpu_set_t), &mask.mask_); } } +bool Os::setThreadAffinityToMainThread() { + if (AMD_CPU_AFFINITY) { + ClPrint(amd::LOG_INFO, amd::LOG_INIT, "Setting Affinity to the main thread's affinity"); + pthread_setaffinity_np(pthread_self(), sizeof(cpu_set_t), &nativeMask_); + } + return true; +} + void Os::yield() { ::sched_yield(); } uint64_t Os::timeNanos() { diff --git a/os/os_win32.cpp b/os/os_win32.cpp index 327bd0ecf..6b4f6aac3 100644 --- a/os/os_win32.cpp +++ b/os/os_win32.cpp @@ -341,6 +341,9 @@ void Os::setThreadAffinity(const void* handle, const Os::ThreadAffinityMask& mas } } +bool Os::setThreadAffinityToMainThread() { + return true; +} void Os::yield() { ::SwitchToThread(); } uint64_t Os::timeNanos() { From 471e665da32c3ae585411ca3389992fbe4c486fb Mon Sep 17 00:00:00 2001 From: Chauncey Hui Date: Wed, 7 Jul 2021 03:00:06 -0400 Subject: [PATCH 052/102] SWDEV-2 - Change OpenCL version number from 3332 to 3333 --- utils/versions.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/utils/versions.hpp b/utils/versions.hpp index 498c519e1..90d68c0be 100644 --- a/utils/versions.hpp +++ b/utils/versions.hpp @@ -28,7 +28,7 @@ #endif // AMD_PLATFORM_NAME #ifndef AMD_PLATFORM_BUILD_NUMBER -#define AMD_PLATFORM_BUILD_NUMBER 3332 +#define AMD_PLATFORM_BUILD_NUMBER 3333 #endif // AMD_PLATFORM_BUILD_NUMBER #ifndef AMD_PLATFORM_REVISION_NUMBER From 3ed980b223d779bce4f6715d6af72ad55f98abe1 Mon Sep 17 00:00:00 2001 From: agunashe Date: Fri, 2 Jul 2021 14:41:38 -0700 Subject: [PATCH 053/102] SWDEV-293742 - Update copyright end year VDI repo Change-Id: I69d2fea4a7a43adf96ccea794270e4af991c5261 --- CMakeLists.txt | 2 +- LICENSE.txt | 2 +- cmake/FindAMD_HSA_LOADER.cmake | 2 +- cmake/FindAMD_OPENCL.cmake | 2 +- cmake/FindAMD_PAL.cmake | 2 +- cmake/FindAMD_UGL.cmake | 2 +- cmake/ROCclr.cmake | 2 +- cmake/ROCclrHSA.cmake | 2 +- cmake/ROCclrHSAIL.cmake | 2 +- cmake/ROCclrLC.cmake | 2 +- cmake/ROCclrPAL.cmake | 2 +- compiler/lib/backends/common/library.hpp | 2 +- compiler/lib/include/acl.h | 2 +- compiler/lib/include/aclDefs.h | 2 +- compiler/lib/include/aclEnums.h | 2 +- compiler/lib/include/aclFunctors.h | 2 +- compiler/lib/include/aclStructs.h | 2 +- compiler/lib/include/aclTypes.h | 2 +- compiler/lib/spirv/spirvUtils.h | 2 +- compiler/lib/utils/OPTIONS.def | 2 +- compiler/lib/utils/bif_section_labels.hpp | 2 +- compiler/lib/utils/libUtils.h | 2 +- compiler/lib/utils/options.cpp | 2 +- compiler/lib/utils/options.hpp | 2 +- device/appprofile.cpp | 2 +- device/appprofile.hpp | 2 +- device/blit.cpp | 2 +- device/blit.hpp | 2 +- device/blitcl.cpp | 2 +- device/comgrctx.cpp | 2 +- device/comgrctx.hpp | 2 +- device/devhcmessages.cpp | 2 +- device/devhcmessages.hpp | 2 +- device/devhcprintf.cpp | 2 +- device/devhostcall.cpp | 2 +- device/devhostcall.hpp | 2 +- device/device.cpp | 2 +- device/device.hpp | 2 +- device/devkernel.cpp | 2 +- device/devkernel.hpp | 2 +- device/devprogram.cpp | 2 +- device/devprogram.hpp | 2 +- device/devsanitizer.hpp | 2 +- device/devsignal.hpp | 2 +- device/devurilocator.hpp | 2 +- device/devwavelimiter.cpp | 2 +- device/devwavelimiter.hpp | 2 +- device/gpu/gpuappprofile.cpp | 2 +- device/gpu/gpuappprofile.hpp | 2 +- device/gpu/gpubinary.cpp | 2 +- device/gpu/gpubinary.hpp | 2 +- device/gpu/gpublit.cpp | 2 +- device/gpu/gpublit.hpp | 2 +- device/gpu/gpucompiler.cpp | 2 +- device/gpu/gpuconstbuf.cpp | 2 +- device/gpu/gpuconstbuf.hpp | 2 +- device/gpu/gpucounters.cpp | 2 +- device/gpu/gpucounters.hpp | 2 +- device/gpu/gpudebugger.hpp | 2 +- device/gpu/gpudebugmanager.cpp | 2 +- device/gpu/gpudebugmanager.hpp | 2 +- device/gpu/gpudefs.hpp | 2 +- device/gpu/gpudevice.cpp | 2 +- device/gpu/gpudevice.hpp | 2 +- device/gpu/gpukernel.cpp | 2 +- device/gpu/gpukernel.hpp | 2 +- device/gpu/gpumemory.cpp | 2 +- device/gpu/gpumemory.hpp | 2 +- device/gpu/gpuprintf.cpp | 2 +- device/gpu/gpuprintf.hpp | 2 +- device/gpu/gpuprogram.cpp | 2 +- device/gpu/gpuprogram.hpp | 2 +- device/gpu/gpuresource.cpp | 2 +- device/gpu/gpuresource.hpp | 2 +- device/gpu/gpusched.hpp | 2 +- device/gpu/gpuschedcl.cpp | 2 +- device/gpu/gpuscsi.cpp | 2 +- device/gpu/gpusettings.cpp | 2 +- device/gpu/gpusettings.hpp | 2 +- device/gpu/gputhreadtrace.cpp | 2 +- device/gpu/gputhreadtrace.hpp | 2 +- device/gpu/gputimestamp.cpp | 2 +- device/gpu/gputimestamp.hpp | 2 +- device/gpu/gpuvirtual.cpp | 2 +- device/gpu/gpuvirtual.hpp | 2 +- device/gpu/gslbe/src/rt/DxxOpenCLInteropExt.h | 2 +- device/gpu/gslbe/src/rt/EventQueue.cpp | 2 +- device/gpu/gslbe/src/rt/EventQueue.h | 2 +- device/gpu/gslbe/src/rt/GSLContext.cpp | 2 +- device/gpu/gslbe/src/rt/GSLContext.h | 2 +- device/gpu/gslbe/src/rt/GSLDevice.cpp | 2 +- device/gpu/gslbe/src/rt/GSLDevice.h | 2 +- device/gpu/gslbe/src/rt/GSLDeviceD3D10.cpp | 2 +- device/gpu/gslbe/src/rt/GSLDeviceD3D11.cpp | 2 +- device/gpu/gslbe/src/rt/GSLDeviceD3D9.cpp | 2 +- device/gpu/gslbe/src/rt/GSLDeviceGL.cpp | 2 +- device/gpu/gslbe/src/rt/GSLStubs.cpp | 2 +- device/gpu/gslbe/src/rt/backend.cpp | 2 +- device/gpu/gslbe/src/rt/backend.h | 2 +- device/gpu/gslbe/src/rt/caltarget.h | 2 +- device/gpu/gslbe/src/rt/inifile/ini_export.cpp | 2 +- device/gpu/gslbe/src/rt/inifile/ini_export.h | 2 +- device/gpu/gslbe/src/rt/inifile/ini_values.h | 2 +- device/gpu/gslbe/src/rt/inifile/inifile.cpp | 2 +- device/gpu/gslbe/src/rt/inifile/inifile.h | 2 +- device/gpu/gslbe/src/rt/inifile/inifile_parser.cpp | 2 +- device/gpu/gslbe/src/rt/inifile/inifile_parser.h | 2 +- device/hsailctx.cpp | 2 +- device/hsailctx.hpp | 2 +- device/hwdebug.cpp | 2 +- device/hwdebug.hpp | 2 +- device/pal/palappprofile.cpp | 2 +- device/pal/palappprofile.hpp | 2 +- device/pal/palblit.cpp | 2 +- device/pal/palblit.hpp | 2 +- device/pal/palconstbuf.cpp | 2 +- device/pal/palconstbuf.hpp | 2 +- device/pal/palcounters.cpp | 2 +- device/pal/palcounters.hpp | 2 +- device/pal/paldebugger.hpp | 2 +- device/pal/paldebugmanager.cpp | 2 +- device/pal/paldebugmanager.hpp | 2 +- device/pal/paldefs.hpp | 2 +- device/pal/paldevice.cpp | 2 +- device/pal/paldevice.hpp | 2 +- device/pal/paldeviced3d10.cpp | 2 +- device/pal/paldeviced3d11.cpp | 2 +- device/pal/paldeviced3d9.cpp | 2 +- device/pal/paldevicegl.cpp | 2 +- device/pal/palgpuopen.cpp | 2 +- device/pal/palgpuopen.hpp | 2 +- device/pal/palkernel.cpp | 2 +- device/pal/palkernel.hpp | 2 +- device/pal/palmemory.cpp | 2 +- device/pal/palmemory.hpp | 2 +- device/pal/palprintf.cpp | 2 +- device/pal/palprintf.hpp | 2 +- device/pal/palprogram.cpp | 2 +- device/pal/palprogram.hpp | 2 +- device/pal/palresource.cpp | 2 +- device/pal/palresource.hpp | 2 +- device/pal/palsched.hpp | 2 +- device/pal/palschedcl.cpp | 2 +- device/pal/palsettings.cpp | 2 +- device/pal/palsettings.hpp | 2 +- device/pal/palsignal.cpp | 2 +- device/pal/palsignal.hpp | 2 +- device/pal/palthreadtrace.cpp | 2 +- device/pal/palthreadtrace.hpp | 2 +- device/pal/paltimestamp.cpp | 2 +- device/pal/paltimestamp.hpp | 2 +- device/pal/paltrap.hpp | 2 +- device/pal/palvirtual.cpp | 2 +- device/pal/palvirtual.hpp | 2 +- device/rocm/mesa_glinterop.h | 2 +- device/rocm/pro/lnxheaders.h | 2 +- device/rocm/pro/prodevice.cpp | 2 +- device/rocm/pro/prodevice.hpp | 2 +- device/rocm/pro/prodriver.hpp | 2 +- device/rocm/pro/profuncs.hpp | 2 +- device/rocm/rocappprofile.cpp | 2 +- device/rocm/rocappprofile.hpp | 2 +- device/rocm/rocblit.cpp | 2 +- device/rocm/rocblit.hpp | 2 +- device/rocm/roccounters.cpp | 2 +- device/rocm/roccounters.hpp | 2 +- device/rocm/rocdefs.hpp | 2 +- device/rocm/rocdevice.cpp | 2 +- device/rocm/rocdevice.hpp | 2 +- device/rocm/rocglinterop.cpp | 2 +- device/rocm/rocglinterop.hpp | 2 +- device/rocm/rockernel.cpp | 2 +- device/rocm/rockernel.hpp | 2 +- device/rocm/rocmemory.cpp | 2 +- device/rocm/rocmemory.hpp | 2 +- device/rocm/rocprintf.cpp | 2 +- device/rocm/rocprintf.hpp | 2 +- device/rocm/rocprogram.cpp | 2 +- device/rocm/rocprogram.hpp | 2 +- device/rocm/rocregisters.hpp | 2 +- device/rocm/rocsched.hpp | 2 +- device/rocm/rocschedcl.cpp | 2 +- device/rocm/rocsettings.cpp | 2 +- device/rocm/rocsettings.hpp | 2 +- device/rocm/rocsignal.cpp | 2 +- device/rocm/rocsignal.hpp | 2 +- device/rocm/rocurilocator.cpp | 2 +- device/rocm/rocurilocator.hpp | 2 +- device/rocm/rocvirtual.cpp | 2 +- device/rocm/rocvirtual.hpp | 2 +- elf/elf.cpp | 2 +- elf/elf.hpp | 2 +- elf/elfio/elf_types.hpp | 2 +- elf/elfio/elfio.hpp | 2 +- elf/elfio/elfio_dump.hpp | 2 +- elf/elfio/elfio_dynamic.hpp | 2 +- elf/elfio/elfio_header.hpp | 2 +- elf/elfio/elfio_note.hpp | 2 +- elf/elfio/elfio_relocation.hpp | 2 +- elf/elfio/elfio_section.hpp | 2 +- elf/elfio/elfio_segment.hpp | 2 +- elf/elfio/elfio_strings.hpp | 2 +- elf/elfio/elfio_symbols.hpp | 2 +- elf/elfio/elfio_utils.hpp | 2 +- elf/test/CMakeLists.txt | 2 +- elf/test/main.cpp | 2 +- include/top.hpp | 2 +- include/vdi_agent_amd.h | 2 +- include/vdi_common.hpp | 2 +- os/alloc.cpp | 2 +- os/alloc.hpp | 2 +- os/os.cpp | 2 +- os/os.hpp | 2 +- os/os_posix.cpp | 2 +- os/os_win32.cpp | 2 +- os/setjmp.S | 2 +- os/setjmp.asm | 2 +- platform/activity.cpp | 2 +- platform/activity.hpp | 2 +- platform/agent.cpp | 2 +- platform/agent.hpp | 2 +- platform/command.cpp | 2 +- platform/command.hpp | 2 +- platform/command_utils.hpp | 2 +- platform/commandqueue.cpp | 2 +- platform/commandqueue.hpp | 2 +- platform/context.cpp | 2 +- platform/context.hpp | 2 +- platform/counter.hpp | 2 +- platform/interop.hpp | 2 +- platform/kernel.cpp | 2 +- platform/kernel.hpp | 2 +- platform/memory.cpp | 2 +- platform/memory.hpp | 2 +- platform/ndrange.cpp | 2 +- platform/ndrange.hpp | 2 +- platform/object.hpp | 2 +- platform/perfctr.hpp | 2 +- platform/prof_protocol.h | 2 +- platform/program.cpp | 2 +- platform/program.hpp | 2 +- platform/runtime.cpp | 2 +- platform/runtime.hpp | 2 +- platform/sampler.hpp | 2 +- platform/threadtrace.hpp | 2 +- thread/monitor.cpp | 2 +- thread/monitor.hpp | 2 +- thread/semaphore.cpp | 2 +- thread/semaphore.hpp | 2 +- thread/thread.cpp | 2 +- thread/thread.hpp | 2 +- utils/concurrent.hpp | 2 +- utils/debug.cpp | 2 +- utils/debug.hpp | 2 +- utils/flags.cpp | 2 +- utils/flags.hpp | 2 +- utils/macros.hpp | 2 +- utils/util.hpp | 2 +- utils/versions.hpp | 2 +- 259 files changed, 259 insertions(+), 259 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 3150fc200..29ed85f88 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,4 +1,4 @@ -# Copyright (C) 2017-2021 Advanced Micro Devices, Inc. All Rights Reserved. +# Copyright (c) 2017 - 2021 Advanced Micro Devices, Inc. All Rights Reserved. # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal diff --git a/LICENSE.txt b/LICENSE.txt index 9a1f87d2c..57378c669 100644 --- a/LICENSE.txt +++ b/LICENSE.txt @@ -1,4 +1,4 @@ -Copyright (c) 2008-2021 Advanced Micro Devices, Inc. +Copyright (c) 2008 - 2021 Advanced Micro Devices, Inc. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/cmake/FindAMD_HSA_LOADER.cmake b/cmake/FindAMD_HSA_LOADER.cmake index 35f682457..05c685462 100644 --- a/cmake/FindAMD_HSA_LOADER.cmake +++ b/cmake/FindAMD_HSA_LOADER.cmake @@ -1,4 +1,4 @@ -# Copyright (c) 2020-2021 Advanced Micro Devices, Inc. All rights reserved. +# Copyright (c) 2020 - 2021 Advanced Micro Devices, Inc. All rights reserved. # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal diff --git a/cmake/FindAMD_OPENCL.cmake b/cmake/FindAMD_OPENCL.cmake index 58b464575..bae08a70c 100644 --- a/cmake/FindAMD_OPENCL.cmake +++ b/cmake/FindAMD_OPENCL.cmake @@ -1,4 +1,4 @@ -# Copyright (c) 2020-2021 Advanced Micro Devices, Inc. All rights reserved. +# Copyright (c) 2020 - 2021 Advanced Micro Devices, Inc. All rights reserved. # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal diff --git a/cmake/FindAMD_PAL.cmake b/cmake/FindAMD_PAL.cmake index ef19c0557..557444458 100644 --- a/cmake/FindAMD_PAL.cmake +++ b/cmake/FindAMD_PAL.cmake @@ -1,4 +1,4 @@ -# Copyright (c) 2020-2021 Advanced Micro Devices, Inc. All rights reserved. +# Copyright (c) 2020 - 2021 Advanced Micro Devices, Inc. All rights reserved. # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal diff --git a/cmake/FindAMD_UGL.cmake b/cmake/FindAMD_UGL.cmake index d30520434..8548fec54 100644 --- a/cmake/FindAMD_UGL.cmake +++ b/cmake/FindAMD_UGL.cmake @@ -1,4 +1,4 @@ -# Copyright (c) 2020-2021 Advanced Micro Devices, Inc. All rights reserved. +# Copyright (c) 2020 - 2021 Advanced Micro Devices, Inc. All rights reserved. # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal diff --git a/cmake/ROCclr.cmake b/cmake/ROCclr.cmake index 41903515d..4b4c27351 100644 --- a/cmake/ROCclr.cmake +++ b/cmake/ROCclr.cmake @@ -1,4 +1,4 @@ -# Copyright (c) 2020-2021 Advanced Micro Devices, Inc. All rights reserved. +# Copyright (c) 2020 - 2021 Advanced Micro Devices, Inc. All rights reserved. # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal diff --git a/cmake/ROCclrHSA.cmake b/cmake/ROCclrHSA.cmake index bb0ddc0c1..fb127ed68 100644 --- a/cmake/ROCclrHSA.cmake +++ b/cmake/ROCclrHSA.cmake @@ -1,4 +1,4 @@ -# Copyright (c) 2020-2021 Advanced Micro Devices, Inc. All rights reserved. +# Copyright (c) 2020 - 2021 Advanced Micro Devices, Inc. All rights reserved. # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal diff --git a/cmake/ROCclrHSAIL.cmake b/cmake/ROCclrHSAIL.cmake index 82b80ee27..24af48413 100644 --- a/cmake/ROCclrHSAIL.cmake +++ b/cmake/ROCclrHSAIL.cmake @@ -1,4 +1,4 @@ -# Copyright (c) 2020-2021 Advanced Micro Devices, Inc. All rights reserved. +# Copyright (c) 2020 - 2021 Advanced Micro Devices, Inc. All rights reserved. # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal diff --git a/cmake/ROCclrLC.cmake b/cmake/ROCclrLC.cmake index 669b96ab7..cb78fa241 100644 --- a/cmake/ROCclrLC.cmake +++ b/cmake/ROCclrLC.cmake @@ -1,4 +1,4 @@ -# Copyright (c) 2020-2021 Advanced Micro Devices, Inc. All rights reserved. +# Copyright (c) 2020 - 2021 Advanced Micro Devices, Inc. All rights reserved. # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal diff --git a/cmake/ROCclrPAL.cmake b/cmake/ROCclrPAL.cmake index 5b4e8f623..304a42566 100644 --- a/cmake/ROCclrPAL.cmake +++ b/cmake/ROCclrPAL.cmake @@ -1,4 +1,4 @@ -# Copyright (c) 2020-2021 Advanced Micro Devices, Inc. All rights reserved. +# Copyright (c) 2020 - 2021 Advanced Micro Devices, Inc. All rights reserved. # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal diff --git a/compiler/lib/backends/common/library.hpp b/compiler/lib/backends/common/library.hpp index e2d94ecf5..2518dc5c1 100644 --- a/compiler/lib/backends/common/library.hpp +++ b/compiler/lib/backends/common/library.hpp @@ -1,4 +1,4 @@ -/* Copyright (c) 2008-present Advanced Micro Devices, Inc. +/* Copyright (c) 2008 - 2021 Advanced Micro Devices, Inc. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/compiler/lib/include/acl.h b/compiler/lib/include/acl.h index d95518e01..30e31090d 100644 --- a/compiler/lib/include/acl.h +++ b/compiler/lib/include/acl.h @@ -1,4 +1,4 @@ -/* Copyright (c) 2012-present Advanced Micro Devices, Inc. +/* Copyright (c) 2012 - 2021 Advanced Micro Devices, Inc. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/compiler/lib/include/aclDefs.h b/compiler/lib/include/aclDefs.h index a0a447160..eac617c50 100644 --- a/compiler/lib/include/aclDefs.h +++ b/compiler/lib/include/aclDefs.h @@ -1,4 +1,4 @@ -/* Copyright (c) 2011-present Advanced Micro Devices, Inc. +/* Copyright (c) 2011 - 2021 Advanced Micro Devices, Inc. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/compiler/lib/include/aclEnums.h b/compiler/lib/include/aclEnums.h index 106415b45..c4aabf030 100644 --- a/compiler/lib/include/aclEnums.h +++ b/compiler/lib/include/aclEnums.h @@ -1,4 +1,4 @@ -/* Copyright (c) 2012-present Advanced Micro Devices, Inc. +/* Copyright (c) 2012 - 2021 Advanced Micro Devices, Inc. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/compiler/lib/include/aclFunctors.h b/compiler/lib/include/aclFunctors.h index ab8a862c1..721fd6376 100644 --- a/compiler/lib/include/aclFunctors.h +++ b/compiler/lib/include/aclFunctors.h @@ -1,4 +1,4 @@ -/* Copyright (c) 2012-present Advanced Micro Devices, Inc. +/* Copyright (c) 2012 - 2021 Advanced Micro Devices, Inc. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/compiler/lib/include/aclStructs.h b/compiler/lib/include/aclStructs.h index e38bdee32..2bfc6afb8 100644 --- a/compiler/lib/include/aclStructs.h +++ b/compiler/lib/include/aclStructs.h @@ -1,4 +1,4 @@ -/* Copyright (c) 2012-present Advanced Micro Devices, Inc. +/* Copyright (c) 2012 - 2021 Advanced Micro Devices, Inc. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/compiler/lib/include/aclTypes.h b/compiler/lib/include/aclTypes.h index f2803b9b2..433829710 100644 --- a/compiler/lib/include/aclTypes.h +++ b/compiler/lib/include/aclTypes.h @@ -1,4 +1,4 @@ -/* Copyright (c) 2012-present Advanced Micro Devices, Inc. +/* Copyright (c) 2012 - 2021 Advanced Micro Devices, Inc. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/compiler/lib/spirv/spirvUtils.h b/compiler/lib/spirv/spirvUtils.h index e5d701b63..0a4a562c4 100644 --- a/compiler/lib/spirv/spirvUtils.h +++ b/compiler/lib/spirv/spirvUtils.h @@ -1,4 +1,4 @@ -/* Copyright (c) 2008-present Advanced Micro Devices, Inc. +/* Copyright (c) 2008 - 2021 Advanced Micro Devices, Inc. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/compiler/lib/utils/OPTIONS.def b/compiler/lib/utils/OPTIONS.def index 46fbbaae2..005465c58 100644 --- a/compiler/lib/utils/OPTIONS.def +++ b/compiler/lib/utils/OPTIONS.def @@ -1,4 +1,4 @@ -/* Copyright (c) 2010-present Advanced Micro Devices, Inc. +/* Copyright (c) 2010 - 2021 Advanced Micro Devices, Inc. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/compiler/lib/utils/bif_section_labels.hpp b/compiler/lib/utils/bif_section_labels.hpp index a5322e94f..873c73032 100644 --- a/compiler/lib/utils/bif_section_labels.hpp +++ b/compiler/lib/utils/bif_section_labels.hpp @@ -1,4 +1,4 @@ -/* Copyright (c) 2012-present Advanced Micro Devices, Inc. +/* Copyright (c) 2012 - 2021 Advanced Micro Devices, Inc. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/compiler/lib/utils/libUtils.h b/compiler/lib/utils/libUtils.h index aa383f396..9e4e47b0b 100644 --- a/compiler/lib/utils/libUtils.h +++ b/compiler/lib/utils/libUtils.h @@ -1,4 +1,4 @@ -/* Copyright (c) 2011-present Advanced Micro Devices, Inc. +/* Copyright (c) 2011 - 2021 Advanced Micro Devices, Inc. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/compiler/lib/utils/options.cpp b/compiler/lib/utils/options.cpp index b90bb1aa6..3127338a6 100644 --- a/compiler/lib/utils/options.cpp +++ b/compiler/lib/utils/options.cpp @@ -1,4 +1,4 @@ -/* Copyright (c) 2008-present Advanced Micro Devices, Inc. +/* Copyright (c) 2008 - 2021 Advanced Micro Devices, Inc. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/compiler/lib/utils/options.hpp b/compiler/lib/utils/options.hpp index 25b15a53a..30442fe87 100644 --- a/compiler/lib/utils/options.hpp +++ b/compiler/lib/utils/options.hpp @@ -1,4 +1,4 @@ -/* Copyright (c) 2009-present Advanced Micro Devices, Inc. +/* Copyright (c) 2009 - 2021 Advanced Micro Devices, Inc. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/device/appprofile.cpp b/device/appprofile.cpp index 4e7930ec5..63fae78c9 100644 --- a/device/appprofile.cpp +++ b/device/appprofile.cpp @@ -1,4 +1,4 @@ -/* Copyright (c) 2014-present Advanced Micro Devices, Inc. +/* Copyright (c) 2014 - 2021 Advanced Micro Devices, Inc. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/device/appprofile.hpp b/device/appprofile.hpp index ca82ea90a..f44535952 100644 --- a/device/appprofile.hpp +++ b/device/appprofile.hpp @@ -1,4 +1,4 @@ -/* Copyright (c) 2014-present Advanced Micro Devices, Inc. +/* Copyright (c) 2014 - 2021 Advanced Micro Devices, Inc. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/device/blit.cpp b/device/blit.cpp index 15eeb8902..f2cbd0763 100644 --- a/device/blit.cpp +++ b/device/blit.cpp @@ -1,4 +1,4 @@ -/* Copyright (c) 2010-present Advanced Micro Devices, Inc. +/* Copyright (c) 2010 - 2021 Advanced Micro Devices, Inc. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/device/blit.hpp b/device/blit.hpp index b87f3d854..8ffd6d85c 100644 --- a/device/blit.hpp +++ b/device/blit.hpp @@ -1,4 +1,4 @@ -/* Copyright (c) 2010-present Advanced Micro Devices, Inc. +/* Copyright (c) 2010 - 2021 Advanced Micro Devices, Inc. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/device/blitcl.cpp b/device/blitcl.cpp index c328c30c3..1520a4fb5 100644 --- a/device/blitcl.cpp +++ b/device/blitcl.cpp @@ -1,4 +1,4 @@ -/* Copyright (c) 2010-present Advanced Micro Devices, Inc. +/* Copyright (c) 2010 - 2021 Advanced Micro Devices, Inc. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/device/comgrctx.cpp b/device/comgrctx.cpp index de195357c..5a2406738 100644 --- a/device/comgrctx.cpp +++ b/device/comgrctx.cpp @@ -1,4 +1,4 @@ -/* Copyright (c) 2008-present Advanced Micro Devices, Inc. +/* Copyright (c) 2008 - 2021 Advanced Micro Devices, Inc. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/device/comgrctx.hpp b/device/comgrctx.hpp index 3399b4eeb..cd74917c2 100644 --- a/device/comgrctx.hpp +++ b/device/comgrctx.hpp @@ -1,4 +1,4 @@ -/* Copyright (c) 2008-present Advanced Micro Devices, Inc. +/* Copyright (c) 2008 - 2021 Advanced Micro Devices, Inc. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/device/devhcmessages.cpp b/device/devhcmessages.cpp index 7b57c0a2f..c8b9c0df2 100644 --- a/device/devhcmessages.cpp +++ b/device/devhcmessages.cpp @@ -1,4 +1,4 @@ -/* Copyright (c) 2020-present Advanced Micro Devices, Inc. +/* Copyright (c) 2020 - 2021 Advanced Micro Devices, Inc. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/device/devhcmessages.hpp b/device/devhcmessages.hpp index 4d0f877a9..fc009c613 100644 --- a/device/devhcmessages.hpp +++ b/device/devhcmessages.hpp @@ -1,4 +1,4 @@ -/* Copyright (c) 2020-present Advanced Micro Devices, Inc. +/* Copyright (c) 2020 - 2021 Advanced Micro Devices, Inc. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/device/devhcprintf.cpp b/device/devhcprintf.cpp index ea5adcbe8..40078db37 100644 --- a/device/devhcprintf.cpp +++ b/device/devhcprintf.cpp @@ -1,4 +1,4 @@ -/* Copyright (c) 2020-present Advanced Micro Devices, Inc. +/* Copyright (c) 2020 - 2021 Advanced Micro Devices, Inc. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/device/devhostcall.cpp b/device/devhostcall.cpp index 5b7517b22..a73c4451b 100644 --- a/device/devhostcall.cpp +++ b/device/devhostcall.cpp @@ -1,4 +1,4 @@ -/* Copyright (c) 2019-present Advanced Micro Devices, Inc. +/* Copyright (c) 2019 - 2021 Advanced Micro Devices, Inc. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/device/devhostcall.hpp b/device/devhostcall.hpp index ae2047c33..f529c1020 100644 --- a/device/devhostcall.hpp +++ b/device/devhostcall.hpp @@ -1,4 +1,4 @@ -/* Copyright (c) 2019-present Advanced Micro Devices, Inc. +/* Copyright (c) 2019 - 2021 Advanced Micro Devices, Inc. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/device/device.cpp b/device/device.cpp index 07dbbb49c..2b6b81995 100644 --- a/device/device.cpp +++ b/device/device.cpp @@ -1,4 +1,4 @@ -/* Copyright (c) 2008-present Advanced Micro Devices, Inc. +/* Copyright (c) 2008 - 2021 Advanced Micro Devices, Inc. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/device/device.hpp b/device/device.hpp index 69eb60a2d..120e03ea7 100644 --- a/device/device.hpp +++ b/device/device.hpp @@ -1,4 +1,4 @@ -/* Copyright (c) 2008-present Advanced Micro Devices, Inc. +/* Copyright (c) 2008 - 2021 Advanced Micro Devices, Inc. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/device/devkernel.cpp b/device/devkernel.cpp index c8079ec9e..2fd4a9ed7 100644 --- a/device/devkernel.cpp +++ b/device/devkernel.cpp @@ -1,4 +1,4 @@ -/* Copyright (c) 2008-present Advanced Micro Devices, Inc. +/* Copyright (c) 2008 - 2021 Advanced Micro Devices, Inc. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/device/devkernel.hpp b/device/devkernel.hpp index 155b219a6..7fa0e8ba6 100644 --- a/device/devkernel.hpp +++ b/device/devkernel.hpp @@ -1,4 +1,4 @@ -/* Copyright (c) 2008-present Advanced Micro Devices, Inc. +/* Copyright (c) 2008 - 2021 Advanced Micro Devices, Inc. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/device/devprogram.cpp b/device/devprogram.cpp index b0e8292a5..279691d0f 100644 --- a/device/devprogram.cpp +++ b/device/devprogram.cpp @@ -1,4 +1,4 @@ -/* Copyright (c) 2008-present Advanced Micro Devices, Inc. +/* Copyright (c) 2008 - 2021 Advanced Micro Devices, Inc. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/device/devprogram.hpp b/device/devprogram.hpp index be6176633..cd2a3c74f 100644 --- a/device/devprogram.hpp +++ b/device/devprogram.hpp @@ -1,4 +1,4 @@ -/* Copyright (c) 2008-present Advanced Micro Devices, Inc. +/* Copyright (c) 2008 - 2021 Advanced Micro Devices, Inc. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/device/devsanitizer.hpp b/device/devsanitizer.hpp index 5483b40ba..beabe6078 100644 --- a/device/devsanitizer.hpp +++ b/device/devsanitizer.hpp @@ -1,4 +1,4 @@ -/* Copyright (c) 2021-present Advanced Micro Devices, Inc. +/* Copyright (c) 2021 - 2021 Advanced Micro Devices, Inc. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/device/devsignal.hpp b/device/devsignal.hpp index 3ce7b0766..20850c9d2 100644 --- a/device/devsignal.hpp +++ b/device/devsignal.hpp @@ -1,4 +1,4 @@ -/* Copyright (c) 2021-present Advanced Micro Devices, Inc. +/* Copyright (c) 2021 - 2021 Advanced Micro Devices, Inc. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/device/devurilocator.hpp b/device/devurilocator.hpp index dcb31b5de..730d7a2ff 100644 --- a/device/devurilocator.hpp +++ b/device/devurilocator.hpp @@ -1,4 +1,4 @@ -/* Copyright (c) 2019-present Advanced Micro Devices, Inc. +/* Copyright (c) 2019 - 2021 Advanced Micro Devices, Inc. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/device/devwavelimiter.cpp b/device/devwavelimiter.cpp index c79d206d6..4180cfdac 100644 --- a/device/devwavelimiter.cpp +++ b/device/devwavelimiter.cpp @@ -1,4 +1,4 @@ -/* Copyright (c) 2015-present Advanced Micro Devices, Inc. +/* Copyright (c) 2015 - 2021 Advanced Micro Devices, Inc. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/device/devwavelimiter.hpp b/device/devwavelimiter.hpp index 394c2cef3..f4c29039a 100644 --- a/device/devwavelimiter.hpp +++ b/device/devwavelimiter.hpp @@ -1,4 +1,4 @@ -/* Copyright (c) 2008-present Advanced Micro Devices, Inc. +/* Copyright (c) 2008 - 2021 Advanced Micro Devices, Inc. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/device/gpu/gpuappprofile.cpp b/device/gpu/gpuappprofile.cpp index 233c00783..417d1a1b2 100644 --- a/device/gpu/gpuappprofile.cpp +++ b/device/gpu/gpuappprofile.cpp @@ -1,4 +1,4 @@ -/* Copyright (c) 2014-present Advanced Micro Devices, Inc. +/* Copyright (c) 2014 - 2021 Advanced Micro Devices, Inc. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/device/gpu/gpuappprofile.hpp b/device/gpu/gpuappprofile.hpp index 72b5a5622..35d3f79ac 100644 --- a/device/gpu/gpuappprofile.hpp +++ b/device/gpu/gpuappprofile.hpp @@ -1,4 +1,4 @@ -/* Copyright (c) 2014-present Advanced Micro Devices, Inc. +/* Copyright (c) 2014 - 2021 Advanced Micro Devices, Inc. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/device/gpu/gpubinary.cpp b/device/gpu/gpubinary.cpp index 69b89a34d..48a3e2b51 100644 --- a/device/gpu/gpubinary.cpp +++ b/device/gpu/gpubinary.cpp @@ -1,4 +1,4 @@ -/* Copyright (c) 2009-present Advanced Micro Devices, Inc. +/* Copyright (c) 2009 - 2021 Advanced Micro Devices, Inc. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/device/gpu/gpubinary.hpp b/device/gpu/gpubinary.hpp index 2577ebe40..6a84035bd 100644 --- a/device/gpu/gpubinary.hpp +++ b/device/gpu/gpubinary.hpp @@ -1,4 +1,4 @@ -/* Copyright (c) 2009-present Advanced Micro Devices, Inc. +/* Copyright (c) 2009 - 2021 Advanced Micro Devices, Inc. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/device/gpu/gpublit.cpp b/device/gpu/gpublit.cpp index 32cd4985d..26bd37e9b 100644 --- a/device/gpu/gpublit.cpp +++ b/device/gpu/gpublit.cpp @@ -1,4 +1,4 @@ -/* Copyright (c) 2010-present Advanced Micro Devices, Inc. +/* Copyright (c) 2010 - 2021 Advanced Micro Devices, Inc. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/device/gpu/gpublit.hpp b/device/gpu/gpublit.hpp index 913f85097..045883162 100644 --- a/device/gpu/gpublit.hpp +++ b/device/gpu/gpublit.hpp @@ -1,4 +1,4 @@ -/* Copyright (c) 2010-present Advanced Micro Devices, Inc. +/* Copyright (c) 2010 - 2021 Advanced Micro Devices, Inc. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/device/gpu/gpucompiler.cpp b/device/gpu/gpucompiler.cpp index 3597aef40..540620350 100644 --- a/device/gpu/gpucompiler.cpp +++ b/device/gpu/gpucompiler.cpp @@ -1,4 +1,4 @@ -/* Copyright (c) 2008-present Advanced Micro Devices, Inc. +/* Copyright (c) 2008 - 2021 Advanced Micro Devices, Inc. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/device/gpu/gpuconstbuf.cpp b/device/gpu/gpuconstbuf.cpp index a49808ba6..03039d473 100644 --- a/device/gpu/gpuconstbuf.cpp +++ b/device/gpu/gpuconstbuf.cpp @@ -1,4 +1,4 @@ -/* Copyright (c) 2010-present Advanced Micro Devices, Inc. +/* Copyright (c) 2010 - 2021 Advanced Micro Devices, Inc. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/device/gpu/gpuconstbuf.hpp b/device/gpu/gpuconstbuf.hpp index c4cbb1340..83a3adad5 100644 --- a/device/gpu/gpuconstbuf.hpp +++ b/device/gpu/gpuconstbuf.hpp @@ -1,4 +1,4 @@ -/* Copyright (c) 2010-present Advanced Micro Devices, Inc. +/* Copyright (c) 2010 - 2021 Advanced Micro Devices, Inc. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/device/gpu/gpucounters.cpp b/device/gpu/gpucounters.cpp index 77db45a1e..95e77dc2e 100644 --- a/device/gpu/gpucounters.cpp +++ b/device/gpu/gpucounters.cpp @@ -1,4 +1,4 @@ -/* Copyright (c) 2008-present Advanced Micro Devices, Inc. +/* Copyright (c) 2008 - 2021 Advanced Micro Devices, Inc. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/device/gpu/gpucounters.hpp b/device/gpu/gpucounters.hpp index 8cb50ceac..f0500008f 100644 --- a/device/gpu/gpucounters.hpp +++ b/device/gpu/gpucounters.hpp @@ -1,4 +1,4 @@ -/* Copyright (c) 2009-present Advanced Micro Devices, Inc. +/* Copyright (c) 2009 - 2021 Advanced Micro Devices, Inc. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/device/gpu/gpudebugger.hpp b/device/gpu/gpudebugger.hpp index e0a28494b..e37546355 100644 --- a/device/gpu/gpudebugger.hpp +++ b/device/gpu/gpudebugger.hpp @@ -1,4 +1,4 @@ -/* Copyright (c) 2014-present Advanced Micro Devices, Inc. +/* Copyright (c) 2014 - 2021 Advanced Micro Devices, Inc. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/device/gpu/gpudebugmanager.cpp b/device/gpu/gpudebugmanager.cpp index 1a84353bc..48ea263e7 100644 --- a/device/gpu/gpudebugmanager.cpp +++ b/device/gpu/gpudebugmanager.cpp @@ -1,4 +1,4 @@ -/* Copyright (c) 2014-present Advanced Micro Devices, Inc. +/* Copyright (c) 2014 - 2021 Advanced Micro Devices, Inc. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/device/gpu/gpudebugmanager.hpp b/device/gpu/gpudebugmanager.hpp index 61bdda86a..18e6ad336 100644 --- a/device/gpu/gpudebugmanager.hpp +++ b/device/gpu/gpudebugmanager.hpp @@ -1,4 +1,4 @@ -/* Copyright (c) 2014-present Advanced Micro Devices, Inc. +/* Copyright (c) 2014 - 2021 Advanced Micro Devices, Inc. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/device/gpu/gpudefs.hpp b/device/gpu/gpudefs.hpp index d5e4f7dbc..6bf8f4a97 100644 --- a/device/gpu/gpudefs.hpp +++ b/device/gpu/gpudefs.hpp @@ -1,4 +1,4 @@ -/* Copyright (c) 2009-present Advanced Micro Devices, Inc. +/* Copyright (c) 2009 - 2021 Advanced Micro Devices, Inc. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/device/gpu/gpudevice.cpp b/device/gpu/gpudevice.cpp index 3eb7b7147..69694207a 100644 --- a/device/gpu/gpudevice.cpp +++ b/device/gpu/gpudevice.cpp @@ -1,4 +1,4 @@ -/* Copyright (c) 2008-present Advanced Micro Devices, Inc. +/* Copyright (c) 2008 - 2021 Advanced Micro Devices, Inc. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/device/gpu/gpudevice.hpp b/device/gpu/gpudevice.hpp index 1ee64f52d..5f15d8814 100644 --- a/device/gpu/gpudevice.hpp +++ b/device/gpu/gpudevice.hpp @@ -1,4 +1,4 @@ -/* Copyright (c) 2009-present Advanced Micro Devices, Inc. +/* Copyright (c) 2009 - 2021 Advanced Micro Devices, Inc. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/device/gpu/gpukernel.cpp b/device/gpu/gpukernel.cpp index 529ea5414..33d3da97f 100644 --- a/device/gpu/gpukernel.cpp +++ b/device/gpu/gpukernel.cpp @@ -1,4 +1,4 @@ -/* Copyright (c) 2008-present Advanced Micro Devices, Inc. +/* Copyright (c) 2008 - 2021 Advanced Micro Devices, Inc. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/device/gpu/gpukernel.hpp b/device/gpu/gpukernel.hpp index 20353441e..f637f9f87 100644 --- a/device/gpu/gpukernel.hpp +++ b/device/gpu/gpukernel.hpp @@ -1,4 +1,4 @@ -/* Copyright (c) 2008-present Advanced Micro Devices, Inc. +/* Copyright (c) 2008 - 2021 Advanced Micro Devices, Inc. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/device/gpu/gpumemory.cpp b/device/gpu/gpumemory.cpp index c45da0bf1..185ded8ab 100644 --- a/device/gpu/gpumemory.cpp +++ b/device/gpu/gpumemory.cpp @@ -1,4 +1,4 @@ -/* Copyright (c) 2008-present Advanced Micro Devices, Inc. +/* Copyright (c) 2008 - 2021 Advanced Micro Devices, Inc. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/device/gpu/gpumemory.hpp b/device/gpu/gpumemory.hpp index 9354900d9..adf7f4f3a 100644 --- a/device/gpu/gpumemory.hpp +++ b/device/gpu/gpumemory.hpp @@ -1,4 +1,4 @@ -/* Copyright (c) 2009-present Advanced Micro Devices, Inc. +/* Copyright (c) 2009 - 2021 Advanced Micro Devices, Inc. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/device/gpu/gpuprintf.cpp b/device/gpu/gpuprintf.cpp index 157d0c799..e049ecb38 100644 --- a/device/gpu/gpuprintf.cpp +++ b/device/gpu/gpuprintf.cpp @@ -1,4 +1,4 @@ -/* Copyright (c) 2010-present Advanced Micro Devices, Inc. +/* Copyright (c) 2010 - 2021 Advanced Micro Devices, Inc. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/device/gpu/gpuprintf.hpp b/device/gpu/gpuprintf.hpp index bbab1bf1b..2ef52079f 100644 --- a/device/gpu/gpuprintf.hpp +++ b/device/gpu/gpuprintf.hpp @@ -1,4 +1,4 @@ -/* Copyright (c) 2010-present Advanced Micro Devices, Inc. +/* Copyright (c) 2010 - 2021 Advanced Micro Devices, Inc. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/device/gpu/gpuprogram.cpp b/device/gpu/gpuprogram.cpp index 33fbe434c..fac1a59c1 100644 --- a/device/gpu/gpuprogram.cpp +++ b/device/gpu/gpuprogram.cpp @@ -1,4 +1,4 @@ -/* Copyright (c) 2008-present Advanced Micro Devices, Inc. +/* Copyright (c) 2008 - 2021 Advanced Micro Devices, Inc. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/device/gpu/gpuprogram.hpp b/device/gpu/gpuprogram.hpp index f4924d656..fe7026df4 100644 --- a/device/gpu/gpuprogram.hpp +++ b/device/gpu/gpuprogram.hpp @@ -1,4 +1,4 @@ -/* Copyright (c) 2008-present Advanced Micro Devices, Inc. +/* Copyright (c) 2008 - 2021 Advanced Micro Devices, Inc. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/device/gpu/gpuresource.cpp b/device/gpu/gpuresource.cpp index d4f19ba94..490a21e28 100644 --- a/device/gpu/gpuresource.cpp +++ b/device/gpu/gpuresource.cpp @@ -1,4 +1,4 @@ -/* Copyright (c) 2008-present Advanced Micro Devices, Inc. +/* Copyright (c) 2008 - 2021 Advanced Micro Devices, Inc. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/device/gpu/gpuresource.hpp b/device/gpu/gpuresource.hpp index 6f81357b2..52a1811c7 100644 --- a/device/gpu/gpuresource.hpp +++ b/device/gpu/gpuresource.hpp @@ -1,4 +1,4 @@ -/* Copyright (c) 2008-present Advanced Micro Devices, Inc. +/* Copyright (c) 2008 - 2021 Advanced Micro Devices, Inc. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/device/gpu/gpusched.hpp b/device/gpu/gpusched.hpp index 2414bde93..2ebc9e875 100644 --- a/device/gpu/gpusched.hpp +++ b/device/gpu/gpusched.hpp @@ -1,4 +1,4 @@ -/* Copyright (c) 2010-present Advanced Micro Devices, Inc. +/* Copyright (c) 2010 - 2021 Advanced Micro Devices, Inc. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/device/gpu/gpuschedcl.cpp b/device/gpu/gpuschedcl.cpp index 3549225d3..55f9394d5 100644 --- a/device/gpu/gpuschedcl.cpp +++ b/device/gpu/gpuschedcl.cpp @@ -1,4 +1,4 @@ -/* Copyright (c) 2010-present Advanced Micro Devices, Inc. +/* Copyright (c) 2010 - 2021 Advanced Micro Devices, Inc. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/device/gpu/gpuscsi.cpp b/device/gpu/gpuscsi.cpp index 748260235..a07122af2 100644 --- a/device/gpu/gpuscsi.cpp +++ b/device/gpu/gpuscsi.cpp @@ -1,4 +1,4 @@ -/* Copyright (c) 2008-present Advanced Micro Devices, Inc. +/* Copyright (c) 2008 - 2021 Advanced Micro Devices, Inc. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/device/gpu/gpusettings.cpp b/device/gpu/gpusettings.cpp index 8955e944a..5251a576d 100644 --- a/device/gpu/gpusettings.cpp +++ b/device/gpu/gpusettings.cpp @@ -1,4 +1,4 @@ -/* Copyright (c) 2010-present Advanced Micro Devices, Inc. +/* Copyright (c) 2010 - 2021 Advanced Micro Devices, Inc. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/device/gpu/gpusettings.hpp b/device/gpu/gpusettings.hpp index c8f1e900e..b51f8aa2b 100644 --- a/device/gpu/gpusettings.hpp +++ b/device/gpu/gpusettings.hpp @@ -1,4 +1,4 @@ -/* Copyright (c) 2010-present Advanced Micro Devices, Inc. +/* Copyright (c) 2010 - 2021 Advanced Micro Devices, Inc. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/device/gpu/gputhreadtrace.cpp b/device/gpu/gputhreadtrace.cpp index 3690589a4..95faa038e 100644 --- a/device/gpu/gputhreadtrace.cpp +++ b/device/gpu/gputhreadtrace.cpp @@ -1,4 +1,4 @@ -/* Copyright (c) 2008-present Advanced Micro Devices, Inc. +/* Copyright (c) 2008 - 2021 Advanced Micro Devices, Inc. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/device/gpu/gputhreadtrace.hpp b/device/gpu/gputhreadtrace.hpp index dde87ad18..4b3789560 100644 --- a/device/gpu/gputhreadtrace.hpp +++ b/device/gpu/gputhreadtrace.hpp @@ -1,4 +1,4 @@ -/* Copyright (c) 2009-present Advanced Micro Devices, Inc. +/* Copyright (c) 2009 - 2021 Advanced Micro Devices, Inc. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/device/gpu/gputimestamp.cpp b/device/gpu/gputimestamp.cpp index ee0bb5b85..a21c655d5 100644 --- a/device/gpu/gputimestamp.cpp +++ b/device/gpu/gputimestamp.cpp @@ -1,4 +1,4 @@ -/* Copyright (c) 2008-present Advanced Micro Devices, Inc. +/* Copyright (c) 2008 - 2021 Advanced Micro Devices, Inc. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/device/gpu/gputimestamp.hpp b/device/gpu/gputimestamp.hpp index 8c37ab8d0..007c9d59c 100644 --- a/device/gpu/gputimestamp.hpp +++ b/device/gpu/gputimestamp.hpp @@ -1,4 +1,4 @@ -/* Copyright (c) 2008-present Advanced Micro Devices, Inc. +/* Copyright (c) 2008 - 2021 Advanced Micro Devices, Inc. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/device/gpu/gpuvirtual.cpp b/device/gpu/gpuvirtual.cpp index f84cfadaf..5073e34f0 100644 --- a/device/gpu/gpuvirtual.cpp +++ b/device/gpu/gpuvirtual.cpp @@ -1,4 +1,4 @@ -/* Copyright (c) 2008-present Advanced Micro Devices, Inc. +/* Copyright (c) 2008 - 2021 Advanced Micro Devices, Inc. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/device/gpu/gpuvirtual.hpp b/device/gpu/gpuvirtual.hpp index a4700f91b..29db28791 100644 --- a/device/gpu/gpuvirtual.hpp +++ b/device/gpu/gpuvirtual.hpp @@ -1,4 +1,4 @@ -/* Copyright (c) 2008-present Advanced Micro Devices, Inc. +/* Copyright (c) 2008 - 2021 Advanced Micro Devices, Inc. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/device/gpu/gslbe/src/rt/DxxOpenCLInteropExt.h b/device/gpu/gslbe/src/rt/DxxOpenCLInteropExt.h index d1ae91a45..abdcbd9ae 100644 --- a/device/gpu/gslbe/src/rt/DxxOpenCLInteropExt.h +++ b/device/gpu/gslbe/src/rt/DxxOpenCLInteropExt.h @@ -1,4 +1,4 @@ - /* Copyright (c) 2008-present Advanced Micro Devices, Inc. + /* Copyright (c) 2008 - 2021 Advanced Micro Devices, Inc. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/device/gpu/gslbe/src/rt/EventQueue.cpp b/device/gpu/gslbe/src/rt/EventQueue.cpp index d6371c861..6cbd1fa78 100644 --- a/device/gpu/gslbe/src/rt/EventQueue.cpp +++ b/device/gpu/gslbe/src/rt/EventQueue.cpp @@ -1,4 +1,4 @@ - /* Copyright (c) 2008-present Advanced Micro Devices, Inc. + /* Copyright (c) 2008 - 2021 Advanced Micro Devices, Inc. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/device/gpu/gslbe/src/rt/EventQueue.h b/device/gpu/gslbe/src/rt/EventQueue.h index 7b4f9055a..4d90af96a 100644 --- a/device/gpu/gslbe/src/rt/EventQueue.h +++ b/device/gpu/gslbe/src/rt/EventQueue.h @@ -1,4 +1,4 @@ - /* Copyright (c) 2008-present Advanced Micro Devices, Inc. + /* Copyright (c) 2008 - 2021 Advanced Micro Devices, Inc. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/device/gpu/gslbe/src/rt/GSLContext.cpp b/device/gpu/gslbe/src/rt/GSLContext.cpp index e28f87e5f..188498256 100644 --- a/device/gpu/gslbe/src/rt/GSLContext.cpp +++ b/device/gpu/gslbe/src/rt/GSLContext.cpp @@ -1,4 +1,4 @@ - /* Copyright (c) 2008-present Advanced Micro Devices, Inc. + /* Copyright (c) 2008 - 2021 Advanced Micro Devices, Inc. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/device/gpu/gslbe/src/rt/GSLContext.h b/device/gpu/gslbe/src/rt/GSLContext.h index f7242f067..5d3d84f36 100644 --- a/device/gpu/gslbe/src/rt/GSLContext.h +++ b/device/gpu/gslbe/src/rt/GSLContext.h @@ -1,4 +1,4 @@ - /* Copyright (c) 2008-present Advanced Micro Devices, Inc. + /* Copyright (c) 2008 - 2021 Advanced Micro Devices, Inc. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/device/gpu/gslbe/src/rt/GSLDevice.cpp b/device/gpu/gslbe/src/rt/GSLDevice.cpp index 4b6784f89..eb24042ff 100644 --- a/device/gpu/gslbe/src/rt/GSLDevice.cpp +++ b/device/gpu/gslbe/src/rt/GSLDevice.cpp @@ -1,4 +1,4 @@ - /* Copyright (c) 2008-present Advanced Micro Devices, Inc. + /* Copyright (c) 2008 - 2021 Advanced Micro Devices, Inc. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/device/gpu/gslbe/src/rt/GSLDevice.h b/device/gpu/gslbe/src/rt/GSLDevice.h index 63dfb5731..d67bad8d5 100644 --- a/device/gpu/gslbe/src/rt/GSLDevice.h +++ b/device/gpu/gslbe/src/rt/GSLDevice.h @@ -1,4 +1,4 @@ - /* Copyright (c) 2008-present Advanced Micro Devices, Inc. + /* Copyright (c) 2008 - 2021 Advanced Micro Devices, Inc. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/device/gpu/gslbe/src/rt/GSLDeviceD3D10.cpp b/device/gpu/gslbe/src/rt/GSLDeviceD3D10.cpp index 197c5bf83..e7462f12d 100644 --- a/device/gpu/gslbe/src/rt/GSLDeviceD3D10.cpp +++ b/device/gpu/gslbe/src/rt/GSLDeviceD3D10.cpp @@ -1,4 +1,4 @@ - /* Copyright (c) 2008-present Advanced Micro Devices, Inc. + /* Copyright (c) 2008 - 2021 Advanced Micro Devices, Inc. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/device/gpu/gslbe/src/rt/GSLDeviceD3D11.cpp b/device/gpu/gslbe/src/rt/GSLDeviceD3D11.cpp index e2a2c875e..b2c357835 100644 --- a/device/gpu/gslbe/src/rt/GSLDeviceD3D11.cpp +++ b/device/gpu/gslbe/src/rt/GSLDeviceD3D11.cpp @@ -1,4 +1,4 @@ - /* Copyright (c) 2008-present Advanced Micro Devices, Inc. + /* Copyright (c) 2008 - 2021 Advanced Micro Devices, Inc. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/device/gpu/gslbe/src/rt/GSLDeviceD3D9.cpp b/device/gpu/gslbe/src/rt/GSLDeviceD3D9.cpp index b7bafbad7..dc049ecdf 100644 --- a/device/gpu/gslbe/src/rt/GSLDeviceD3D9.cpp +++ b/device/gpu/gslbe/src/rt/GSLDeviceD3D9.cpp @@ -1,4 +1,4 @@ - /* Copyright (c) 2008-present Advanced Micro Devices, Inc. + /* Copyright (c) 2008 - 2021 Advanced Micro Devices, Inc. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/device/gpu/gslbe/src/rt/GSLDeviceGL.cpp b/device/gpu/gslbe/src/rt/GSLDeviceGL.cpp index 42f576a46..22dcc41c9 100644 --- a/device/gpu/gslbe/src/rt/GSLDeviceGL.cpp +++ b/device/gpu/gslbe/src/rt/GSLDeviceGL.cpp @@ -1,4 +1,4 @@ - /* Copyright (c) 2008-present Advanced Micro Devices, Inc. + /* Copyright (c) 2008 - 2021 Advanced Micro Devices, Inc. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/device/gpu/gslbe/src/rt/GSLStubs.cpp b/device/gpu/gslbe/src/rt/GSLStubs.cpp index 205226d2e..731023d1d 100644 --- a/device/gpu/gslbe/src/rt/GSLStubs.cpp +++ b/device/gpu/gslbe/src/rt/GSLStubs.cpp @@ -1,4 +1,4 @@ - /* Copyright (c) 2008-present Advanced Micro Devices, Inc. + /* Copyright (c) 2008 - 2021 Advanced Micro Devices, Inc. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/device/gpu/gslbe/src/rt/backend.cpp b/device/gpu/gslbe/src/rt/backend.cpp index 531ffa017..6aea71353 100644 --- a/device/gpu/gslbe/src/rt/backend.cpp +++ b/device/gpu/gslbe/src/rt/backend.cpp @@ -1,4 +1,4 @@ - /* Copyright (c) 2008-present Advanced Micro Devices, Inc. + /* Copyright (c) 2008 - 2021 Advanced Micro Devices, Inc. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/device/gpu/gslbe/src/rt/backend.h b/device/gpu/gslbe/src/rt/backend.h index 90215e78e..5970aff64 100644 --- a/device/gpu/gslbe/src/rt/backend.h +++ b/device/gpu/gslbe/src/rt/backend.h @@ -1,4 +1,4 @@ - /* Copyright (c) 2008-present Advanced Micro Devices, Inc. + /* Copyright (c) 2008 - 2021 Advanced Micro Devices, Inc. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/device/gpu/gslbe/src/rt/caltarget.h b/device/gpu/gslbe/src/rt/caltarget.h index 3f0b3ded8..2f5b0fb6c 100644 --- a/device/gpu/gslbe/src/rt/caltarget.h +++ b/device/gpu/gslbe/src/rt/caltarget.h @@ -1,4 +1,4 @@ - /* Copyright (c) 2008-present Advanced Micro Devices, Inc. + /* Copyright (c) 2008 - 2021 Advanced Micro Devices, Inc. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/device/gpu/gslbe/src/rt/inifile/ini_export.cpp b/device/gpu/gslbe/src/rt/inifile/ini_export.cpp index 9e8c24fab..757ffef8c 100644 --- a/device/gpu/gslbe/src/rt/inifile/ini_export.cpp +++ b/device/gpu/gslbe/src/rt/inifile/ini_export.cpp @@ -1,4 +1,4 @@ - /* Copyright (c) 2008-present Advanced Micro Devices, Inc. + /* Copyright (c) 2008 - 2021 Advanced Micro Devices, Inc. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/device/gpu/gslbe/src/rt/inifile/ini_export.h b/device/gpu/gslbe/src/rt/inifile/ini_export.h index 33ecbd6f3..43f489fce 100644 --- a/device/gpu/gslbe/src/rt/inifile/ini_export.h +++ b/device/gpu/gslbe/src/rt/inifile/ini_export.h @@ -1,4 +1,4 @@ - /* Copyright (c) 2008-present Advanced Micro Devices, Inc. + /* Copyright (c) 2008 - 2021 Advanced Micro Devices, Inc. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/device/gpu/gslbe/src/rt/inifile/ini_values.h b/device/gpu/gslbe/src/rt/inifile/ini_values.h index 70ed3c982..289fb8a6f 100644 --- a/device/gpu/gslbe/src/rt/inifile/ini_values.h +++ b/device/gpu/gslbe/src/rt/inifile/ini_values.h @@ -1,4 +1,4 @@ - /* Copyright (c) 2008-present Advanced Micro Devices, Inc. + /* Copyright (c) 2008 - 2021 Advanced Micro Devices, Inc. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/device/gpu/gslbe/src/rt/inifile/inifile.cpp b/device/gpu/gslbe/src/rt/inifile/inifile.cpp index e1802ad4e..30c15dc16 100644 --- a/device/gpu/gslbe/src/rt/inifile/inifile.cpp +++ b/device/gpu/gslbe/src/rt/inifile/inifile.cpp @@ -1,4 +1,4 @@ - /* Copyright (c) 2008-present Advanced Micro Devices, Inc. + /* Copyright (c) 2008 - 2021 Advanced Micro Devices, Inc. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/device/gpu/gslbe/src/rt/inifile/inifile.h b/device/gpu/gslbe/src/rt/inifile/inifile.h index cf8231f4b..a44d36d9a 100644 --- a/device/gpu/gslbe/src/rt/inifile/inifile.h +++ b/device/gpu/gslbe/src/rt/inifile/inifile.h @@ -1,4 +1,4 @@ - /* Copyright (c) 2008-present Advanced Micro Devices, Inc. + /* Copyright (c) 2008 - 2021 Advanced Micro Devices, Inc. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/device/gpu/gslbe/src/rt/inifile/inifile_parser.cpp b/device/gpu/gslbe/src/rt/inifile/inifile_parser.cpp index ff372688e..04a8f4247 100644 --- a/device/gpu/gslbe/src/rt/inifile/inifile_parser.cpp +++ b/device/gpu/gslbe/src/rt/inifile/inifile_parser.cpp @@ -1,4 +1,4 @@ - /* Copyright (c) 2008-present Advanced Micro Devices, Inc. + /* Copyright (c) 2008 - 2021 Advanced Micro Devices, Inc. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/device/gpu/gslbe/src/rt/inifile/inifile_parser.h b/device/gpu/gslbe/src/rt/inifile/inifile_parser.h index 210ec86f8..d8faad0fe 100644 --- a/device/gpu/gslbe/src/rt/inifile/inifile_parser.h +++ b/device/gpu/gslbe/src/rt/inifile/inifile_parser.h @@ -1,4 +1,4 @@ - /* Copyright (c) 2008-present Advanced Micro Devices, Inc. + /* Copyright (c) 2008 - 2021 Advanced Micro Devices, Inc. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/device/hsailctx.cpp b/device/hsailctx.cpp index d8cfe0fd0..fccfda35c 100644 --- a/device/hsailctx.cpp +++ b/device/hsailctx.cpp @@ -1,4 +1,4 @@ -/* Copyright (c) 2021-present Advanced Micro Devices, Inc. +/* Copyright (c) 2021 - 2021 Advanced Micro Devices, Inc. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/device/hsailctx.hpp b/device/hsailctx.hpp index ced65c7da..1b4144838 100644 --- a/device/hsailctx.hpp +++ b/device/hsailctx.hpp @@ -1,4 +1,4 @@ -/* Copyright (c) 2021-present Advanced Micro Devices, Inc. +/* Copyright (c) 2021 - 2021 Advanced Micro Devices, Inc. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/device/hwdebug.cpp b/device/hwdebug.cpp index b0397c368..85e38d74f 100644 --- a/device/hwdebug.cpp +++ b/device/hwdebug.cpp @@ -1,4 +1,4 @@ -/* Copyright (c) 2014-present Advanced Micro Devices, Inc. +/* Copyright (c) 2014 - 2021 Advanced Micro Devices, Inc. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/device/hwdebug.hpp b/device/hwdebug.hpp index 08d830b13..b8bb1c5f1 100644 --- a/device/hwdebug.hpp +++ b/device/hwdebug.hpp @@ -1,4 +1,4 @@ -/* Copyright (c) 2014-present Advanced Micro Devices, Inc. +/* Copyright (c) 2014 - 2021 Advanced Micro Devices, Inc. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/device/pal/palappprofile.cpp b/device/pal/palappprofile.cpp index 8bd5e88cb..771cd3b16 100644 --- a/device/pal/palappprofile.cpp +++ b/device/pal/palappprofile.cpp @@ -1,4 +1,4 @@ -/* Copyright (c) 2015-present Advanced Micro Devices, Inc. +/* Copyright (c) 2015 - 2021 Advanced Micro Devices, Inc. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/device/pal/palappprofile.hpp b/device/pal/palappprofile.hpp index b10dad5c9..66fb3fe7d 100644 --- a/device/pal/palappprofile.hpp +++ b/device/pal/palappprofile.hpp @@ -1,4 +1,4 @@ -/* Copyright (c) 2015-present Advanced Micro Devices, Inc. +/* Copyright (c) 2015 - 2021 Advanced Micro Devices, Inc. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/device/pal/palblit.cpp b/device/pal/palblit.cpp index a89076e70..ad64ced84 100644 --- a/device/pal/palblit.cpp +++ b/device/pal/palblit.cpp @@ -1,4 +1,4 @@ -/* Copyright (c) 2015-present Advanced Micro Devices, Inc. +/* Copyright (c) 2015 - 2021 Advanced Micro Devices, Inc. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/device/pal/palblit.hpp b/device/pal/palblit.hpp index 571ce8020..2a0bdd272 100644 --- a/device/pal/palblit.hpp +++ b/device/pal/palblit.hpp @@ -1,4 +1,4 @@ -/* Copyright (c) 2015-present Advanced Micro Devices, Inc. +/* Copyright (c) 2015 - 2021 Advanced Micro Devices, Inc. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/device/pal/palconstbuf.cpp b/device/pal/palconstbuf.cpp index 8204efb3a..7acb7ee0a 100644 --- a/device/pal/palconstbuf.cpp +++ b/device/pal/palconstbuf.cpp @@ -1,4 +1,4 @@ -/* Copyright (c) 2010-present Advanced Micro Devices, Inc. +/* Copyright (c) 2010 - 2021 Advanced Micro Devices, Inc. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/device/pal/palconstbuf.hpp b/device/pal/palconstbuf.hpp index c42ddae95..7acb8674c 100644 --- a/device/pal/palconstbuf.hpp +++ b/device/pal/palconstbuf.hpp @@ -1,4 +1,4 @@ -/* Copyright (c) 2015-present Advanced Micro Devices, Inc. +/* Copyright (c) 2015 - 2021 Advanced Micro Devices, Inc. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/device/pal/palcounters.cpp b/device/pal/palcounters.cpp index b57446054..3da038efc 100644 --- a/device/pal/palcounters.cpp +++ b/device/pal/palcounters.cpp @@ -1,4 +1,4 @@ -/* Copyright (c) 2015-present Advanced Micro Devices, Inc. +/* Copyright (c) 2015 - 2021 Advanced Micro Devices, Inc. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/device/pal/palcounters.hpp b/device/pal/palcounters.hpp index d3adc3b6c..b332f7bde 100644 --- a/device/pal/palcounters.hpp +++ b/device/pal/palcounters.hpp @@ -1,4 +1,4 @@ -/* Copyright (c) 2015-present Advanced Micro Devices, Inc. +/* Copyright (c) 2015 - 2021 Advanced Micro Devices, Inc. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/device/pal/paldebugger.hpp b/device/pal/paldebugger.hpp index 29e964837..33cf9dee7 100644 --- a/device/pal/paldebugger.hpp +++ b/device/pal/paldebugger.hpp @@ -1,4 +1,4 @@ -/* Copyright (c) 2015-present Advanced Micro Devices, Inc. +/* Copyright (c) 2015 - 2021 Advanced Micro Devices, Inc. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/device/pal/paldebugmanager.cpp b/device/pal/paldebugmanager.cpp index 11419842a..2d0f137c3 100644 --- a/device/pal/paldebugmanager.cpp +++ b/device/pal/paldebugmanager.cpp @@ -1,4 +1,4 @@ -/* Copyright (c) 2015-present Advanced Micro Devices, Inc. +/* Copyright (c) 2015 - 2021 Advanced Micro Devices, Inc. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/device/pal/paldebugmanager.hpp b/device/pal/paldebugmanager.hpp index 6c800997a..1c85a045b 100644 --- a/device/pal/paldebugmanager.hpp +++ b/device/pal/paldebugmanager.hpp @@ -1,4 +1,4 @@ -/* Copyright (c) 2015-present Advanced Micro Devices, Inc. +/* Copyright (c) 2015 - 2021 Advanced Micro Devices, Inc. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/device/pal/paldefs.hpp b/device/pal/paldefs.hpp index 55a71c50a..19e15b325 100644 --- a/device/pal/paldefs.hpp +++ b/device/pal/paldefs.hpp @@ -1,4 +1,4 @@ -/* Copyright (c) 2015-present Advanced Micro Devices, Inc. +/* Copyright (c) 2015 - 2021 Advanced Micro Devices, Inc. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/device/pal/paldevice.cpp b/device/pal/paldevice.cpp index f2d7e7af2..b37b674d1 100644 --- a/device/pal/paldevice.cpp +++ b/device/pal/paldevice.cpp @@ -1,4 +1,4 @@ -/* Copyright (c) 2008-present Advanced Micro Devices, Inc. +/* Copyright (c) 2008 - 2021 Advanced Micro Devices, Inc. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/device/pal/paldevice.hpp b/device/pal/paldevice.hpp index 6f19ebebe..e9f1adf5d 100644 --- a/device/pal/paldevice.hpp +++ b/device/pal/paldevice.hpp @@ -1,4 +1,4 @@ -/* Copyright (c) 2015-present Advanced Micro Devices, Inc. +/* Copyright (c) 2015 - 2021 Advanced Micro Devices, Inc. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/device/pal/paldeviced3d10.cpp b/device/pal/paldeviced3d10.cpp index 5d391619b..a454c27c7 100644 --- a/device/pal/paldeviced3d10.cpp +++ b/device/pal/paldeviced3d10.cpp @@ -1,4 +1,4 @@ -/* Copyright (c) 2015-present Advanced Micro Devices, Inc. +/* Copyright (c) 2015 - 2021 Advanced Micro Devices, Inc. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/device/pal/paldeviced3d11.cpp b/device/pal/paldeviced3d11.cpp index e74292c83..f9fa1e537 100644 --- a/device/pal/paldeviced3d11.cpp +++ b/device/pal/paldeviced3d11.cpp @@ -1,4 +1,4 @@ -/* Copyright (c) 2015-present Advanced Micro Devices, Inc. +/* Copyright (c) 2015 - 2021 Advanced Micro Devices, Inc. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/device/pal/paldeviced3d9.cpp b/device/pal/paldeviced3d9.cpp index cc243082b..dd96a44f5 100644 --- a/device/pal/paldeviced3d9.cpp +++ b/device/pal/paldeviced3d9.cpp @@ -1,4 +1,4 @@ -/* Copyright (c) 2015-present Advanced Micro Devices, Inc. +/* Copyright (c) 2015 - 2021 Advanced Micro Devices, Inc. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/device/pal/paldevicegl.cpp b/device/pal/paldevicegl.cpp index fe031aaa9..e8a95d0e6 100644 --- a/device/pal/paldevicegl.cpp +++ b/device/pal/paldevicegl.cpp @@ -1,4 +1,4 @@ -/* Copyright (c) 2015-present Advanced Micro Devices, Inc. +/* Copyright (c) 2015 - 2021 Advanced Micro Devices, Inc. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/device/pal/palgpuopen.cpp b/device/pal/palgpuopen.cpp index 40b4e6f1e..3862878b2 100644 --- a/device/pal/palgpuopen.cpp +++ b/device/pal/palgpuopen.cpp @@ -1,4 +1,4 @@ -/* Copyright (c) 2016-present Advanced Micro Devices, Inc. +/* Copyright (c) 2016 - 2021 Advanced Micro Devices, Inc. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/device/pal/palgpuopen.hpp b/device/pal/palgpuopen.hpp index 5c8d1f6e7..9705f6747 100644 --- a/device/pal/palgpuopen.hpp +++ b/device/pal/palgpuopen.hpp @@ -1,4 +1,4 @@ -/* Copyright (c) 2016-present Advanced Micro Devices, Inc. +/* Copyright (c) 2016 - 2021 Advanced Micro Devices, Inc. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/device/pal/palkernel.cpp b/device/pal/palkernel.cpp index bf7090a48..e4d6dbdfc 100644 --- a/device/pal/palkernel.cpp +++ b/device/pal/palkernel.cpp @@ -1,4 +1,4 @@ -/* Copyright (c) 2015-present Advanced Micro Devices, Inc. +/* Copyright (c) 2015 - 2021 Advanced Micro Devices, Inc. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/device/pal/palkernel.hpp b/device/pal/palkernel.hpp index 3c262f638..5a05bfef6 100644 --- a/device/pal/palkernel.hpp +++ b/device/pal/palkernel.hpp @@ -1,4 +1,4 @@ -/* Copyright (c) 2015-present Advanced Micro Devices, Inc. +/* Copyright (c) 2015 - 2021 Advanced Micro Devices, Inc. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/device/pal/palmemory.cpp b/device/pal/palmemory.cpp index 9774d2501..8a5940d1a 100644 --- a/device/pal/palmemory.cpp +++ b/device/pal/palmemory.cpp @@ -1,4 +1,4 @@ -/* Copyright (c) 2015-present Advanced Micro Devices, Inc. +/* Copyright (c) 2015 - 2021 Advanced Micro Devices, Inc. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/device/pal/palmemory.hpp b/device/pal/palmemory.hpp index 53f169c7c..bc49c0f65 100644 --- a/device/pal/palmemory.hpp +++ b/device/pal/palmemory.hpp @@ -1,4 +1,4 @@ -/* Copyright (c) 2015-present Advanced Micro Devices, Inc. +/* Copyright (c) 2015 - 2021 Advanced Micro Devices, Inc. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/device/pal/palprintf.cpp b/device/pal/palprintf.cpp index acdc6dbda..61f1fead0 100644 --- a/device/pal/palprintf.cpp +++ b/device/pal/palprintf.cpp @@ -1,4 +1,4 @@ -/* Copyright (c) 2015-present Advanced Micro Devices, Inc. +/* Copyright (c) 2015 - 2021 Advanced Micro Devices, Inc. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/device/pal/palprintf.hpp b/device/pal/palprintf.hpp index bf3d6ef04..5c7b899c4 100644 --- a/device/pal/palprintf.hpp +++ b/device/pal/palprintf.hpp @@ -1,4 +1,4 @@ -/* Copyright (c) 2015-present Advanced Micro Devices, Inc. +/* Copyright (c) 2015 - 2021 Advanced Micro Devices, Inc. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/device/pal/palprogram.cpp b/device/pal/palprogram.cpp index a97b3530f..3cc213466 100644 --- a/device/pal/palprogram.cpp +++ b/device/pal/palprogram.cpp @@ -1,4 +1,4 @@ -/* Copyright (c) 2015-present Advanced Micro Devices, Inc. +/* Copyright (c) 2015 - 2021 Advanced Micro Devices, Inc. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/device/pal/palprogram.hpp b/device/pal/palprogram.hpp index dd2a42990..69a8ce6fb 100644 --- a/device/pal/palprogram.hpp +++ b/device/pal/palprogram.hpp @@ -1,4 +1,4 @@ -/* Copyright (c) 2015-present Advanced Micro Devices, Inc. +/* Copyright (c) 2015 - 2021 Advanced Micro Devices, Inc. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/device/pal/palresource.cpp b/device/pal/palresource.cpp index 99e8d1c34..c084c0d15 100644 --- a/device/pal/palresource.cpp +++ b/device/pal/palresource.cpp @@ -1,4 +1,4 @@ -/* Copyright (c) 2015-present Advanced Micro Devices, Inc. +/* Copyright (c) 2015 - 2021 Advanced Micro Devices, Inc. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/device/pal/palresource.hpp b/device/pal/palresource.hpp index bf3cf0196..e6da7725d 100644 --- a/device/pal/palresource.hpp +++ b/device/pal/palresource.hpp @@ -1,4 +1,4 @@ -/* Copyright (c) 2015-present Advanced Micro Devices, Inc. +/* Copyright (c) 2015 - 2021 Advanced Micro Devices, Inc. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/device/pal/palsched.hpp b/device/pal/palsched.hpp index 4f14f41a5..abcab0ded 100644 --- a/device/pal/palsched.hpp +++ b/device/pal/palsched.hpp @@ -1,4 +1,4 @@ -/* Copyright (c) 2015-present Advanced Micro Devices, Inc. +/* Copyright (c) 2015 - 2021 Advanced Micro Devices, Inc. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/device/pal/palschedcl.cpp b/device/pal/palschedcl.cpp index 672144d8e..344ee02d8 100644 --- a/device/pal/palschedcl.cpp +++ b/device/pal/palschedcl.cpp @@ -1,4 +1,4 @@ -/* Copyright (c) 2015-present Advanced Micro Devices, Inc. +/* Copyright (c) 2015 - 2021 Advanced Micro Devices, Inc. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/device/pal/palsettings.cpp b/device/pal/palsettings.cpp index 315b63eb6..01997fd30 100644 --- a/device/pal/palsettings.cpp +++ b/device/pal/palsettings.cpp @@ -1,4 +1,4 @@ -/* Copyright (c) 2015-present Advanced Micro Devices, Inc. +/* Copyright (c) 2015 - 2021 Advanced Micro Devices, Inc. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/device/pal/palsettings.hpp b/device/pal/palsettings.hpp index 4c871d57f..c682dc3d3 100644 --- a/device/pal/palsettings.hpp +++ b/device/pal/palsettings.hpp @@ -1,4 +1,4 @@ -/* Copyright (c) 2015-present Advanced Micro Devices, Inc. +/* Copyright (c) 2015 - 2021 Advanced Micro Devices, Inc. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/device/pal/palsignal.cpp b/device/pal/palsignal.cpp index 53a9cefc8..4f7ac33cc 100644 --- a/device/pal/palsignal.cpp +++ b/device/pal/palsignal.cpp @@ -1,4 +1,4 @@ -/* Copyright (c) 2021-present Advanced Micro Devices, Inc. +/* Copyright (c) 2021 - 2021 Advanced Micro Devices, Inc. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/device/pal/palsignal.hpp b/device/pal/palsignal.hpp index b02a43b1f..d043c6a92 100644 --- a/device/pal/palsignal.hpp +++ b/device/pal/palsignal.hpp @@ -1,4 +1,4 @@ -/* Copyright (c) 2021-present Advanced Micro Devices, Inc. +/* Copyright (c) 2021 - 2021 Advanced Micro Devices, Inc. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/device/pal/palthreadtrace.cpp b/device/pal/palthreadtrace.cpp index d4cda4781..aee42e1a0 100644 --- a/device/pal/palthreadtrace.cpp +++ b/device/pal/palthreadtrace.cpp @@ -1,4 +1,4 @@ -/* Copyright (c) 2015-present Advanced Micro Devices, Inc. +/* Copyright (c) 2015 - 2021 Advanced Micro Devices, Inc. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/device/pal/palthreadtrace.hpp b/device/pal/palthreadtrace.hpp index 27680b974..e0b71c8f6 100644 --- a/device/pal/palthreadtrace.hpp +++ b/device/pal/palthreadtrace.hpp @@ -1,4 +1,4 @@ -/* Copyright (c) 2015-present Advanced Micro Devices, Inc. +/* Copyright (c) 2015 - 2021 Advanced Micro Devices, Inc. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/device/pal/paltimestamp.cpp b/device/pal/paltimestamp.cpp index e9726afcc..37af64b8c 100644 --- a/device/pal/paltimestamp.cpp +++ b/device/pal/paltimestamp.cpp @@ -1,4 +1,4 @@ -/* Copyright (c) 2015-present Advanced Micro Devices, Inc. +/* Copyright (c) 2015 - 2021 Advanced Micro Devices, Inc. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/device/pal/paltimestamp.hpp b/device/pal/paltimestamp.hpp index add6bd5f8..183051389 100644 --- a/device/pal/paltimestamp.hpp +++ b/device/pal/paltimestamp.hpp @@ -1,4 +1,4 @@ -/* Copyright (c) 2015-present Advanced Micro Devices, Inc. +/* Copyright (c) 2015 - 2021 Advanced Micro Devices, Inc. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/device/pal/paltrap.hpp b/device/pal/paltrap.hpp index e62cfb59b..783ec5fe4 100644 --- a/device/pal/paltrap.hpp +++ b/device/pal/paltrap.hpp @@ -1,4 +1,4 @@ -/* Copyright (c) 2015-present Advanced Micro Devices, Inc. +/* Copyright (c) 2015 - 2021 Advanced Micro Devices, Inc. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/device/pal/palvirtual.cpp b/device/pal/palvirtual.cpp index 4ddd805f8..ea6d87a51 100644 --- a/device/pal/palvirtual.cpp +++ b/device/pal/palvirtual.cpp @@ -1,4 +1,4 @@ -/* Copyright (c) 2015-present Advanced Micro Devices, Inc. +/* Copyright (c) 2015 - 2021 Advanced Micro Devices, Inc. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/device/pal/palvirtual.hpp b/device/pal/palvirtual.hpp index 82098b1ad..87538a2d4 100644 --- a/device/pal/palvirtual.hpp +++ b/device/pal/palvirtual.hpp @@ -1,4 +1,4 @@ -/* Copyright (c) 2015-present Advanced Micro Devices, Inc. +/* Copyright (c) 2015 - 2021 Advanced Micro Devices, Inc. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/device/rocm/mesa_glinterop.h b/device/rocm/mesa_glinterop.h index bf3003d01..cf6669e48 100644 --- a/device/rocm/mesa_glinterop.h +++ b/device/rocm/mesa_glinterop.h @@ -1,4 +1,4 @@ -/* Copyright (c) 2016-present Advanced Micro Devices, Inc. +/* Copyright (c) 2016 - 2021 Advanced Micro Devices, Inc. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/device/rocm/pro/lnxheaders.h b/device/rocm/pro/lnxheaders.h index afa4e894e..8c9fbe141 100644 --- a/device/rocm/pro/lnxheaders.h +++ b/device/rocm/pro/lnxheaders.h @@ -1,4 +1,4 @@ -/* Copyright (c) 2017-present Advanced Micro Devices, Inc. +/* Copyright (c) 2017 - 2021 Advanced Micro Devices, Inc. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/device/rocm/pro/prodevice.cpp b/device/rocm/pro/prodevice.cpp index ad963100e..465735c8c 100644 --- a/device/rocm/pro/prodevice.cpp +++ b/device/rocm/pro/prodevice.cpp @@ -1,4 +1,4 @@ -/* Copyright (c) 2017-present Advanced Micro Devices, Inc. +/* Copyright (c) 2017 - 2021 Advanced Micro Devices, Inc. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/device/rocm/pro/prodevice.hpp b/device/rocm/pro/prodevice.hpp index 34cc83938..80ff3600c 100644 --- a/device/rocm/pro/prodevice.hpp +++ b/device/rocm/pro/prodevice.hpp @@ -1,4 +1,4 @@ -/* Copyright (c) 2017-present Advanced Micro Devices, Inc. +/* Copyright (c) 2017 - 2021 Advanced Micro Devices, Inc. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/device/rocm/pro/prodriver.hpp b/device/rocm/pro/prodriver.hpp index 1467e88f2..bc4206974 100644 --- a/device/rocm/pro/prodriver.hpp +++ b/device/rocm/pro/prodriver.hpp @@ -1,4 +1,4 @@ -/* Copyright (c) 2017-present Advanced Micro Devices, Inc. +/* Copyright (c) 2017 - 2021 Advanced Micro Devices, Inc. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/device/rocm/pro/profuncs.hpp b/device/rocm/pro/profuncs.hpp index 864b167f6..e878df0c9 100644 --- a/device/rocm/pro/profuncs.hpp +++ b/device/rocm/pro/profuncs.hpp @@ -1,4 +1,4 @@ -/* Copyright (c) 2017-present Advanced Micro Devices, Inc. +/* Copyright (c) 2017 - 2021 Advanced Micro Devices, Inc. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/device/rocm/rocappprofile.cpp b/device/rocm/rocappprofile.cpp index 26579aa29..0ac1b42b9 100644 --- a/device/rocm/rocappprofile.cpp +++ b/device/rocm/rocappprofile.cpp @@ -1,4 +1,4 @@ -/* Copyright (c) 2009-present Advanced Micro Devices, Inc. +/* Copyright (c) 2009 - 2021 Advanced Micro Devices, Inc. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/device/rocm/rocappprofile.hpp b/device/rocm/rocappprofile.hpp index ab62fa9f2..67643f30e 100644 --- a/device/rocm/rocappprofile.hpp +++ b/device/rocm/rocappprofile.hpp @@ -1,4 +1,4 @@ -/* Copyright (c) 2009-present Advanced Micro Devices, Inc. +/* Copyright (c) 2009 - 2021 Advanced Micro Devices, Inc. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/device/rocm/rocblit.cpp b/device/rocm/rocblit.cpp index 2f2f54f92..c89da0cf3 100644 --- a/device/rocm/rocblit.cpp +++ b/device/rocm/rocblit.cpp @@ -1,4 +1,4 @@ -/* Copyright (c) 2015-present Advanced Micro Devices, Inc. +/* Copyright (c) 2015 - 2021 Advanced Micro Devices, Inc. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/device/rocm/rocblit.hpp b/device/rocm/rocblit.hpp index 24959350e..5c14bee2c 100644 --- a/device/rocm/rocblit.hpp +++ b/device/rocm/rocblit.hpp @@ -1,4 +1,4 @@ -/* Copyright (c) 2015-present Advanced Micro Devices, Inc. +/* Copyright (c) 2015 - 2021 Advanced Micro Devices, Inc. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/device/rocm/roccounters.cpp b/device/rocm/roccounters.cpp index 0cac41dda..6a976d547 100644 --- a/device/rocm/roccounters.cpp +++ b/device/rocm/roccounters.cpp @@ -1,4 +1,4 @@ -/* Copyright (c) 2017-present Advanced Micro Devices, Inc. +/* Copyright (c) 2017 - 2021 Advanced Micro Devices, Inc. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/device/rocm/roccounters.hpp b/device/rocm/roccounters.hpp index 5719cde66..ffc9fad09 100644 --- a/device/rocm/roccounters.hpp +++ b/device/rocm/roccounters.hpp @@ -1,4 +1,4 @@ -/* Copyright (c) 2017-present Advanced Micro Devices, Inc. +/* Copyright (c) 2017 - 2021 Advanced Micro Devices, Inc. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/device/rocm/rocdefs.hpp b/device/rocm/rocdefs.hpp index 8e9712fb6..b93156a52 100644 --- a/device/rocm/rocdefs.hpp +++ b/device/rocm/rocdefs.hpp @@ -1,4 +1,4 @@ -/* Copyright (c) 2008-present Advanced Micro Devices, Inc. +/* Copyright (c) 2008 - 2021 Advanced Micro Devices, Inc. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/device/rocm/rocdevice.cpp b/device/rocm/rocdevice.cpp index 0bb98ec59..17905799a 100644 --- a/device/rocm/rocdevice.cpp +++ b/device/rocm/rocdevice.cpp @@ -1,4 +1,4 @@ -/* Copyright (c) 2008-present Advanced Micro Devices, Inc. +/* Copyright (c) 2008 - 2021 Advanced Micro Devices, Inc. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/device/rocm/rocdevice.hpp b/device/rocm/rocdevice.hpp index b408a9d71..572d817c6 100644 --- a/device/rocm/rocdevice.hpp +++ b/device/rocm/rocdevice.hpp @@ -1,4 +1,4 @@ -/* Copyright (c) 2009-present Advanced Micro Devices, Inc. +/* Copyright (c) 2009 - 2021 Advanced Micro Devices, Inc. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/device/rocm/rocglinterop.cpp b/device/rocm/rocglinterop.cpp index 9d81e4495..02754c259 100644 --- a/device/rocm/rocglinterop.cpp +++ b/device/rocm/rocglinterop.cpp @@ -1,4 +1,4 @@ -/* Copyright (c) 2016-present Advanced Micro Devices, Inc. +/* Copyright (c) 2016 - 2021 Advanced Micro Devices, Inc. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/device/rocm/rocglinterop.hpp b/device/rocm/rocglinterop.hpp index 88102d0e5..a539cc6fc 100644 --- a/device/rocm/rocglinterop.hpp +++ b/device/rocm/rocglinterop.hpp @@ -1,4 +1,4 @@ -/* Copyright (c) 2016-present Advanced Micro Devices, Inc. +/* Copyright (c) 2016 - 2021 Advanced Micro Devices, Inc. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/device/rocm/rockernel.cpp b/device/rocm/rockernel.cpp index e9caf329d..d46c9adef 100644 --- a/device/rocm/rockernel.cpp +++ b/device/rocm/rockernel.cpp @@ -1,4 +1,4 @@ -/* Copyright (c) 2009-present Advanced Micro Devices, Inc. +/* Copyright (c) 2009 - 2021 Advanced Micro Devices, Inc. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/device/rocm/rockernel.hpp b/device/rocm/rockernel.hpp index c6cedd83d..9f52e059a 100644 --- a/device/rocm/rockernel.hpp +++ b/device/rocm/rockernel.hpp @@ -1,4 +1,4 @@ -/* Copyright (c) 2009-present Advanced Micro Devices, Inc. +/* Copyright (c) 2009 - 2021 Advanced Micro Devices, Inc. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/device/rocm/rocmemory.cpp b/device/rocm/rocmemory.cpp index 998066118..5a35d8503 100644 --- a/device/rocm/rocmemory.cpp +++ b/device/rocm/rocmemory.cpp @@ -1,4 +1,4 @@ -/* Copyright (c) 2008-present Advanced Micro Devices, Inc. +/* Copyright (c) 2008 - 2021 Advanced Micro Devices, Inc. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/device/rocm/rocmemory.hpp b/device/rocm/rocmemory.hpp index 6f9468e41..4eaa57619 100644 --- a/device/rocm/rocmemory.hpp +++ b/device/rocm/rocmemory.hpp @@ -1,4 +1,4 @@ -/* Copyright (c) 2016-present Advanced Micro Devices, Inc. +/* Copyright (c) 2016 - 2021 Advanced Micro Devices, Inc. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/device/rocm/rocprintf.cpp b/device/rocm/rocprintf.cpp index 83088a753..a7740e17a 100644 --- a/device/rocm/rocprintf.cpp +++ b/device/rocm/rocprintf.cpp @@ -1,4 +1,4 @@ -/* Copyright (c) 2010-present Advanced Micro Devices, Inc. +/* Copyright (c) 2010 - 2021 Advanced Micro Devices, Inc. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/device/rocm/rocprintf.hpp b/device/rocm/rocprintf.hpp index 7a87aa86d..2945ee835 100644 --- a/device/rocm/rocprintf.hpp +++ b/device/rocm/rocprintf.hpp @@ -1,4 +1,4 @@ -/* Copyright (c) 2010-present Advanced Micro Devices, Inc. +/* Copyright (c) 2010 - 2021 Advanced Micro Devices, Inc. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/device/rocm/rocprogram.cpp b/device/rocm/rocprogram.cpp index f791e449b..1959f33a9 100644 --- a/device/rocm/rocprogram.cpp +++ b/device/rocm/rocprogram.cpp @@ -1,4 +1,4 @@ -/* Copyright (c) 2008-present Advanced Micro Devices, Inc. +/* Copyright (c) 2008 - 2021 Advanced Micro Devices, Inc. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/device/rocm/rocprogram.hpp b/device/rocm/rocprogram.hpp index 0776df689..4c0b7fbdc 100644 --- a/device/rocm/rocprogram.hpp +++ b/device/rocm/rocprogram.hpp @@ -1,4 +1,4 @@ -/* Copyright (c) 2008-present Advanced Micro Devices, Inc. +/* Copyright (c) 2008 - 2021 Advanced Micro Devices, Inc. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/device/rocm/rocregisters.hpp b/device/rocm/rocregisters.hpp index 721f06eee..4fac38e85 100644 --- a/device/rocm/rocregisters.hpp +++ b/device/rocm/rocregisters.hpp @@ -1,4 +1,4 @@ -/* Copyright (c) 2016-present Advanced Micro Devices, Inc. +/* Copyright (c) 2016 - 2021 Advanced Micro Devices, Inc. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/device/rocm/rocsched.hpp b/device/rocm/rocsched.hpp index da7b2c841..bf431bd1a 100644 --- a/device/rocm/rocsched.hpp +++ b/device/rocm/rocsched.hpp @@ -1,4 +1,4 @@ -/* Copyright (c) 2018-present Advanced Micro Devices, Inc. +/* Copyright (c) 2018 - 2021 Advanced Micro Devices, Inc. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/device/rocm/rocschedcl.cpp b/device/rocm/rocschedcl.cpp index f06d30055..051c28a51 100644 --- a/device/rocm/rocschedcl.cpp +++ b/device/rocm/rocschedcl.cpp @@ -1,4 +1,4 @@ -/* Copyright (c) 2018-present Advanced Micro Devices, Inc. +/* Copyright (c) 2018 - 2021 Advanced Micro Devices, Inc. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/device/rocm/rocsettings.cpp b/device/rocm/rocsettings.cpp index 62cbc3579..a1845caa9 100644 --- a/device/rocm/rocsettings.cpp +++ b/device/rocm/rocsettings.cpp @@ -1,4 +1,4 @@ -/* Copyright (c) 2010-present Advanced Micro Devices, Inc. +/* Copyright (c) 2010 - 2021 Advanced Micro Devices, Inc. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/device/rocm/rocsettings.hpp b/device/rocm/rocsettings.hpp index 34baa1ec4..a7107ce86 100644 --- a/device/rocm/rocsettings.hpp +++ b/device/rocm/rocsettings.hpp @@ -1,4 +1,4 @@ -/* Copyright (c) 2010-present Advanced Micro Devices, Inc. +/* Copyright (c) 2010 - 2021 Advanced Micro Devices, Inc. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/device/rocm/rocsignal.cpp b/device/rocm/rocsignal.cpp index 013f4f3e5..d6a7b68eb 100644 --- a/device/rocm/rocsignal.cpp +++ b/device/rocm/rocsignal.cpp @@ -1,4 +1,4 @@ -/* Copyright (c) 2021-present Advanced Micro Devices, Inc. +/* Copyright (c) 2021 - 2021 Advanced Micro Devices, Inc. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/device/rocm/rocsignal.hpp b/device/rocm/rocsignal.hpp index 2350c14e2..3e8c1e6e6 100644 --- a/device/rocm/rocsignal.hpp +++ b/device/rocm/rocsignal.hpp @@ -1,4 +1,4 @@ -/* Copyright (c) 2021-present Advanced Micro Devices, Inc. +/* Copyright (c) 2021 - 2021 Advanced Micro Devices, Inc. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/device/rocm/rocurilocator.cpp b/device/rocm/rocurilocator.cpp index acc01e75c..157f07d99 100644 --- a/device/rocm/rocurilocator.cpp +++ b/device/rocm/rocurilocator.cpp @@ -1,4 +1,4 @@ -/* Copyright (c) 2021-present Advanced Micro Devices, Inc. +/* Copyright (c) 2021 - 2021 Advanced Micro Devices, Inc. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/device/rocm/rocurilocator.hpp b/device/rocm/rocurilocator.hpp index 8331b479c..880b6c72f 100644 --- a/device/rocm/rocurilocator.hpp +++ b/device/rocm/rocurilocator.hpp @@ -1,4 +1,4 @@ -/* Copyright (c) 2019-present Advanced Micro Devices, Inc. +/* Copyright (c) 2019 - 2021 Advanced Micro Devices, Inc. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/device/rocm/rocvirtual.cpp b/device/rocm/rocvirtual.cpp index 2e62b0336..0433435a5 100644 --- a/device/rocm/rocvirtual.cpp +++ b/device/rocm/rocvirtual.cpp @@ -1,4 +1,4 @@ -/* Copyright (c) 2013-present Advanced Micro Devices, Inc. +/* Copyright (c) 2013 - 2021 Advanced Micro Devices, Inc. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/device/rocm/rocvirtual.hpp b/device/rocm/rocvirtual.hpp index 277289731..85f938370 100644 --- a/device/rocm/rocvirtual.hpp +++ b/device/rocm/rocvirtual.hpp @@ -1,4 +1,4 @@ -/* Copyright (c) 2008-present Advanced Micro Devices, Inc. +/* Copyright (c) 2008 - 2021 Advanced Micro Devices, Inc. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/elf/elf.cpp b/elf/elf.cpp index 0e08feed0..3dfe41989 100644 --- a/elf/elf.cpp +++ b/elf/elf.cpp @@ -1,4 +1,4 @@ -/* Copyright (c) 2010-present Advanced Micro Devices, Inc. +/* Copyright (c) 2010 - 2021 Advanced Micro Devices, Inc. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/elf/elf.hpp b/elf/elf.hpp index 15268f108..14220ad4b 100644 --- a/elf/elf.hpp +++ b/elf/elf.hpp @@ -1,4 +1,4 @@ -/* Copyright (c) 2008-present Advanced Micro Devices, Inc. +/* Copyright (c) 2008 - 2021 Advanced Micro Devices, Inc. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/elf/elfio/elf_types.hpp b/elf/elfio/elf_types.hpp index 966b3a116..5a286e09c 100644 --- a/elf/elfio/elf_types.hpp +++ b/elf/elfio/elf_types.hpp @@ -1,6 +1,6 @@ /* Copyright (C) 2001-2015 by Serge Lamikhov-Center -Modifications Copyright (C) 2020-2021 Advanced Micro Devices, Inc. +Modifications Copyright (c) 2020 - 2021 Advanced Micro Devices, Inc. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/elf/elfio/elfio.hpp b/elf/elfio/elfio.hpp index 3ed1fb0e8..2985f79e1 100644 --- a/elf/elfio/elfio.hpp +++ b/elf/elfio/elfio.hpp @@ -1,6 +1,6 @@ /* Copyright (C) 2001-2015 by Serge Lamikhov-Center -Modifications Copyright (C) 2020-2021 Advanced Micro Devices, Inc. +Modifications Copyright (c) 2020 - 2021 Advanced Micro Devices, Inc. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/elf/elfio/elfio_dump.hpp b/elf/elfio/elfio_dump.hpp index c70bf538b..86d70b58b 100644 --- a/elf/elfio/elfio_dump.hpp +++ b/elf/elfio/elfio_dump.hpp @@ -1,6 +1,6 @@ /* Copyright (C) 2001-2015 by Serge Lamikhov-Center -Modifications Copyright (C) 2020-2021 Advanced Micro Devices, Inc. +Modifications Copyright (c) 2020 - 2021 Advanced Micro Devices, Inc. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/elf/elfio/elfio_dynamic.hpp b/elf/elfio/elfio_dynamic.hpp index 87cf62d55..9350c84ed 100644 --- a/elf/elfio/elfio_dynamic.hpp +++ b/elf/elfio/elfio_dynamic.hpp @@ -1,6 +1,6 @@ /* Copyright (C) 2001-2015 by Serge Lamikhov-Center -Modifications Copyright (C) 2020-2021 Advanced Micro Devices, Inc. +Modifications Copyright (c) 2020 - 2021 Advanced Micro Devices, Inc. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/elf/elfio/elfio_header.hpp b/elf/elfio/elfio_header.hpp index f061fecc8..c1697b22f 100644 --- a/elf/elfio/elfio_header.hpp +++ b/elf/elfio/elfio_header.hpp @@ -1,6 +1,6 @@ /* Copyright (C) 2001-2015 by Serge Lamikhov-Center -Modifications Copyright (C) 2020-2021 Advanced Micro Devices, Inc. +Modifications Copyright (c) 2020 - 2021 Advanced Micro Devices, Inc. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/elf/elfio/elfio_note.hpp b/elf/elfio/elfio_note.hpp index d054906f1..725294eaa 100644 --- a/elf/elfio/elfio_note.hpp +++ b/elf/elfio/elfio_note.hpp @@ -1,6 +1,6 @@ /* Copyright (C) 2001-2015 by Serge Lamikhov-Center -Modifications Copyright (C) 2020-2021 Advanced Micro Devices, Inc. +Modifications Copyright (c) 2020 - 2021 Advanced Micro Devices, Inc. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/elf/elfio/elfio_relocation.hpp b/elf/elfio/elfio_relocation.hpp index b6aa16016..18804b8d7 100644 --- a/elf/elfio/elfio_relocation.hpp +++ b/elf/elfio/elfio_relocation.hpp @@ -1,6 +1,6 @@ /* Copyright (C) 2001-2015 by Serge Lamikhov-Center -Modifications Copyright (C) 2020-2021 Advanced Micro Devices, Inc. +Modifications Copyright (c) 2020 - 2021 Advanced Micro Devices, Inc. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/elf/elfio/elfio_section.hpp b/elf/elfio/elfio_section.hpp index 22e0f5791..21ca4bc24 100644 --- a/elf/elfio/elfio_section.hpp +++ b/elf/elfio/elfio_section.hpp @@ -1,6 +1,6 @@ /* Copyright (C) 2001-2015 by Serge Lamikhov-Center -Modifications Copyright (C) 2020-2021 Advanced Micro Devices, Inc. +Modifications Copyright (c) 2020 - 2021 Advanced Micro Devices, Inc. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/elf/elfio/elfio_segment.hpp b/elf/elfio/elfio_segment.hpp index 3de7e2278..f35412497 100644 --- a/elf/elfio/elfio_segment.hpp +++ b/elf/elfio/elfio_segment.hpp @@ -1,6 +1,6 @@ /* Copyright (C) 2001-2015 by Serge Lamikhov-Center -Modifications Copyright (C) 2020-2021 Advanced Micro Devices, Inc. +Modifications Copyright (c) 2020 - 2021 Advanced Micro Devices, Inc. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/elf/elfio/elfio_strings.hpp b/elf/elfio/elfio_strings.hpp index cda5bd7c4..88a45e5d7 100644 --- a/elf/elfio/elfio_strings.hpp +++ b/elf/elfio/elfio_strings.hpp @@ -1,6 +1,6 @@ /* Copyright (C) 2001-2015 by Serge Lamikhov-Center -Modifications Copyright (C) 2020-2021 Advanced Micro Devices, Inc. +Modifications Copyright (c) 2020 - 2021 Advanced Micro Devices, Inc. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/elf/elfio/elfio_symbols.hpp b/elf/elfio/elfio_symbols.hpp index 65482d30b..96ad16d3b 100644 --- a/elf/elfio/elfio_symbols.hpp +++ b/elf/elfio/elfio_symbols.hpp @@ -1,6 +1,6 @@ /* Copyright (C) 2001-2015 by Serge Lamikhov-Center -Modifications Copyright (C) 2020-2021 Advanced Micro Devices, Inc. +Modifications Copyright (c) 2020 - 2021 Advanced Micro Devices, Inc. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/elf/elfio/elfio_utils.hpp b/elf/elfio/elfio_utils.hpp index 6dd980c3c..6482da2dc 100644 --- a/elf/elfio/elfio_utils.hpp +++ b/elf/elfio/elfio_utils.hpp @@ -1,6 +1,6 @@ /* Copyright (C) 2001-2015 by Serge Lamikhov-Center -Modifications Copyright (C) 2020-2021 Advanced Micro Devices, Inc. +Modifications Copyright (c) 2020 - 2021 Advanced Micro Devices, Inc. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/elf/test/CMakeLists.txt b/elf/test/CMakeLists.txt index 8ed8d020b..8326176b0 100644 --- a/elf/test/CMakeLists.txt +++ b/elf/test/CMakeLists.txt @@ -1,4 +1,4 @@ -# Copyright (C) 2020-2021 Advanced Micro Devices, Inc. All Rights Reserved. +# Copyright (c) 2020 - 2021 Advanced Micro Devices, Inc. All Rights Reserved. # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal diff --git a/elf/test/main.cpp b/elf/test/main.cpp index fed06c6a5..bc4d5f625 100644 --- a/elf/test/main.cpp +++ b/elf/test/main.cpp @@ -1,4 +1,4 @@ -/* Copyright (c) 2020-2021 Advanced Micro Devices, Inc. All Rights Reserved. +/* Copyright (c) 2020 - 2021 Advanced Micro Devices, Inc. All Rights Reserved. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/include/top.hpp b/include/top.hpp index 531f92664..f51db7d1e 100644 --- a/include/top.hpp +++ b/include/top.hpp @@ -1,4 +1,4 @@ -/* Copyright (c) 2008-present Advanced Micro Devices, Inc. +/* Copyright (c) 2008 - 2021 Advanced Micro Devices, Inc. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/include/vdi_agent_amd.h b/include/vdi_agent_amd.h index 91d8250db..ffc86697e 100644 --- a/include/vdi_agent_amd.h +++ b/include/vdi_agent_amd.h @@ -1,4 +1,4 @@ -/* Copyright (c) 2010-present Advanced Micro Devices, Inc. +/* Copyright (c) 2010 - 2021 Advanced Micro Devices, Inc. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/include/vdi_common.hpp b/include/vdi_common.hpp index feb73288e..c4dbf023a 100644 --- a/include/vdi_common.hpp +++ b/include/vdi_common.hpp @@ -1,4 +1,4 @@ -/* Copyright (c) 2020-present Advanced Micro Devices, Inc. +/* Copyright (c) 2020 - 2021 Advanced Micro Devices, Inc. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/os/alloc.cpp b/os/alloc.cpp index 6caaea437..ac4f590ff 100644 --- a/os/alloc.cpp +++ b/os/alloc.cpp @@ -1,4 +1,4 @@ -/* Copyright (c) 2008-present Advanced Micro Devices, Inc. +/* Copyright (c) 2008 - 2021 Advanced Micro Devices, Inc. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/os/alloc.hpp b/os/alloc.hpp index d03930af4..7c1729080 100644 --- a/os/alloc.hpp +++ b/os/alloc.hpp @@ -1,4 +1,4 @@ -/* Copyright (c) 2008-present Advanced Micro Devices, Inc. +/* Copyright (c) 2008 - 2021 Advanced Micro Devices, Inc. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/os/os.cpp b/os/os.cpp index d9cd1d35c..f5ea547dd 100644 --- a/os/os.cpp +++ b/os/os.cpp @@ -1,4 +1,4 @@ -/* Copyright (c) 2008-present Advanced Micro Devices, Inc. +/* Copyright (c) 2008 - 2021 Advanced Micro Devices, Inc. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/os/os.hpp b/os/os.hpp index 811bc0691..923b0f450 100644 --- a/os/os.hpp +++ b/os/os.hpp @@ -1,4 +1,4 @@ -/* Copyright (c) 2008-present Advanced Micro Devices, Inc. +/* Copyright (c) 2008 - 2021 Advanced Micro Devices, Inc. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/os/os_posix.cpp b/os/os_posix.cpp index 0868dbc15..04b2f71a7 100644 --- a/os/os_posix.cpp +++ b/os/os_posix.cpp @@ -1,4 +1,4 @@ -/* Copyright (c) 2008-present Advanced Micro Devices, Inc. +/* Copyright (c) 2008 - 2021 Advanced Micro Devices, Inc. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/os/os_win32.cpp b/os/os_win32.cpp index 6b4f6aac3..499aa694b 100644 --- a/os/os_win32.cpp +++ b/os/os_win32.cpp @@ -1,4 +1,4 @@ -/* Copyright (c) 2008-present Advanced Micro Devices, Inc. +/* Copyright (c) 2008 - 2021 Advanced Micro Devices, Inc. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/os/setjmp.S b/os/setjmp.S index 56424147c..f88a64131 100644 --- a/os/setjmp.S +++ b/os/setjmp.S @@ -1,4 +1,4 @@ - # Copyright (c) 2008-present Advanced Micro Devices, Inc. + # Copyright (c) 2008 - 2021 Advanced Micro Devices, Inc. #Permission is hereby granted, free of charge, to any person obtaining a copy #of this software and associated documentation files (the "Software"), to deal diff --git a/os/setjmp.asm b/os/setjmp.asm index a6e16f8e2..4fcb60eab 100644 --- a/os/setjmp.asm +++ b/os/setjmp.asm @@ -1,4 +1,4 @@ - ; Copyright (c) 2008-present Advanced Micro Devices, Inc. + ; Copyright (c) 2008 - 2021 Advanced Micro Devices, Inc. ;Permission is hereby granted, free of charge, to any person obtaining a copy ;of this software and associated documentation files (the "Software"), to deal diff --git a/platform/activity.cpp b/platform/activity.cpp index 34a9f6b23..2e19ef273 100644 --- a/platform/activity.cpp +++ b/platform/activity.cpp @@ -1,4 +1,4 @@ -/* Copyright (c) 2019-present Advanced Micro Devices, Inc. +/* Copyright (c) 2019 - 2021 Advanced Micro Devices, Inc. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/platform/activity.hpp b/platform/activity.hpp index 5c28fb396..bcda9e93e 100644 --- a/platform/activity.hpp +++ b/platform/activity.hpp @@ -1,4 +1,4 @@ -/* Copyright (c) 2019-present Advanced Micro Devices, Inc. +/* Copyright (c) 2019 - 2021 Advanced Micro Devices, Inc. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/platform/agent.cpp b/platform/agent.cpp index 46bce0b61..cc7b107a1 100644 --- a/platform/agent.cpp +++ b/platform/agent.cpp @@ -1,4 +1,4 @@ -/* Copyright (c) 2010-present Advanced Micro Devices, Inc. +/* Copyright (c) 2010 - 2021 Advanced Micro Devices, Inc. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/platform/agent.hpp b/platform/agent.hpp index 77c046d5d..ed4b8f2ea 100644 --- a/platform/agent.hpp +++ b/platform/agent.hpp @@ -1,4 +1,4 @@ -/* Copyright (c) 2010-present Advanced Micro Devices, Inc. +/* Copyright (c) 2010 - 2021 Advanced Micro Devices, Inc. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/platform/command.cpp b/platform/command.cpp index 19c29cce6..9cc8b4cce 100644 --- a/platform/command.cpp +++ b/platform/command.cpp @@ -1,4 +1,4 @@ -/* Copyright (c) 2008-present Advanced Micro Devices, Inc. +/* Copyright (c) 2008 - 2021 Advanced Micro Devices, Inc. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/platform/command.hpp b/platform/command.hpp index 02945b19a..487b7a98d 100644 --- a/platform/command.hpp +++ b/platform/command.hpp @@ -1,4 +1,4 @@ -/* Copyright (c) 2010-present Advanced Micro Devices, Inc. +/* Copyright (c) 2010 - 2021 Advanced Micro Devices, Inc. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/platform/command_utils.hpp b/platform/command_utils.hpp index 9a8b812f0..388f20e4e 100644 --- a/platform/command_utils.hpp +++ b/platform/command_utils.hpp @@ -1,4 +1,4 @@ -/* Copyright (c) 2021-present Advanced Micro Devices, Inc. +/* Copyright (c) 2021 - 2021 Advanced Micro Devices, Inc. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/platform/commandqueue.cpp b/platform/commandqueue.cpp index 4f552e4ab..4de1a0aca 100644 --- a/platform/commandqueue.cpp +++ b/platform/commandqueue.cpp @@ -1,4 +1,4 @@ -/* Copyright (c) 2012-present Advanced Micro Devices, Inc. +/* Copyright (c) 2012 - 2021 Advanced Micro Devices, Inc. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/platform/commandqueue.hpp b/platform/commandqueue.hpp index a0e013c27..e61a45b94 100644 --- a/platform/commandqueue.hpp +++ b/platform/commandqueue.hpp @@ -1,4 +1,4 @@ -/* Copyright (c) 2012-present Advanced Micro Devices, Inc. +/* Copyright (c) 2012 - 2021 Advanced Micro Devices, Inc. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/platform/context.cpp b/platform/context.cpp index 01d7206d6..2901f7c9b 100644 --- a/platform/context.cpp +++ b/platform/context.cpp @@ -1,4 +1,4 @@ -/* Copyright (c) 2008-present Advanced Micro Devices, Inc. +/* Copyright (c) 2008 - 2021 Advanced Micro Devices, Inc. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/platform/context.hpp b/platform/context.hpp index e8bcacb1e..904d92e0a 100644 --- a/platform/context.hpp +++ b/platform/context.hpp @@ -1,4 +1,4 @@ -/* Copyright (c) 2008-present Advanced Micro Devices, Inc. +/* Copyright (c) 2008 - 2021 Advanced Micro Devices, Inc. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/platform/counter.hpp b/platform/counter.hpp index c12cc6610..425d1314f 100644 --- a/platform/counter.hpp +++ b/platform/counter.hpp @@ -1,4 +1,4 @@ -/* Copyright (c) 2008-present Advanced Micro Devices, Inc. +/* Copyright (c) 2008 - 2021 Advanced Micro Devices, Inc. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/platform/interop.hpp b/platform/interop.hpp index aac1e3cbc..4080f98b1 100644 --- a/platform/interop.hpp +++ b/platform/interop.hpp @@ -1,4 +1,4 @@ -/* Copyright (c) 2008-present Advanced Micro Devices, Inc. +/* Copyright (c) 2008 - 2021 Advanced Micro Devices, Inc. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/platform/kernel.cpp b/platform/kernel.cpp index 55d2bd1b8..a716f2d7d 100644 --- a/platform/kernel.cpp +++ b/platform/kernel.cpp @@ -1,4 +1,4 @@ -/* Copyright (c) 2008-present Advanced Micro Devices, Inc. +/* Copyright (c) 2008 - 2021 Advanced Micro Devices, Inc. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/platform/kernel.hpp b/platform/kernel.hpp index fdc58e14f..305eab180 100644 --- a/platform/kernel.hpp +++ b/platform/kernel.hpp @@ -1,4 +1,4 @@ -/* Copyright (c) 2008-present Advanced Micro Devices, Inc. +/* Copyright (c) 2008 - 2021 Advanced Micro Devices, Inc. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/platform/memory.cpp b/platform/memory.cpp index b1b3ff122..20eef2bbf 100644 --- a/platform/memory.cpp +++ b/platform/memory.cpp @@ -1,4 +1,4 @@ -/* Copyright (c) 2010-present Advanced Micro Devices, Inc. +/* Copyright (c) 2010 - 2021 Advanced Micro Devices, Inc. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/platform/memory.hpp b/platform/memory.hpp index 4943d6a7d..4edd7e282 100644 --- a/platform/memory.hpp +++ b/platform/memory.hpp @@ -1,4 +1,4 @@ -/* Copyright (c) 2010-present Advanced Micro Devices, Inc. +/* Copyright (c) 2010 - 2021 Advanced Micro Devices, Inc. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/platform/ndrange.cpp b/platform/ndrange.cpp index 92c97a59b..162d1249e 100644 --- a/platform/ndrange.cpp +++ b/platform/ndrange.cpp @@ -1,4 +1,4 @@ -/* Copyright (c) 2008-present Advanced Micro Devices, Inc. +/* Copyright (c) 2008 - 2021 Advanced Micro Devices, Inc. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/platform/ndrange.hpp b/platform/ndrange.hpp index 0f48f4641..14a52ef35 100644 --- a/platform/ndrange.hpp +++ b/platform/ndrange.hpp @@ -1,4 +1,4 @@ -/* Copyright (c) 2008-present Advanced Micro Devices, Inc. +/* Copyright (c) 2008 - 2021 Advanced Micro Devices, Inc. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/platform/object.hpp b/platform/object.hpp index f55768383..9fca95ab2 100644 --- a/platform/object.hpp +++ b/platform/object.hpp @@ -1,4 +1,4 @@ -/* Copyright (c) 2008-present Advanced Micro Devices, Inc. +/* Copyright (c) 2008 - 2021 Advanced Micro Devices, Inc. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/platform/perfctr.hpp b/platform/perfctr.hpp index fc32e76c2..c1c9545f4 100644 --- a/platform/perfctr.hpp +++ b/platform/perfctr.hpp @@ -1,4 +1,4 @@ -/* Copyright (c) 2008-present Advanced Micro Devices, Inc. +/* Copyright (c) 2008 - 2021 Advanced Micro Devices, Inc. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/platform/prof_protocol.h b/platform/prof_protocol.h index 0ab1f0fe5..ae23fa5ab 100644 --- a/platform/prof_protocol.h +++ b/platform/prof_protocol.h @@ -1,4 +1,4 @@ -/* Copyright (c) 2018-present Advanced Micro Devices, Inc. +/* Copyright (c) 2018 - 2021 Advanced Micro Devices, Inc. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/platform/program.cpp b/platform/program.cpp index 071e5beee..98900e8a1 100644 --- a/platform/program.cpp +++ b/platform/program.cpp @@ -1,4 +1,4 @@ -/* Copyright (c) 2008-present Advanced Micro Devices, Inc. +/* Copyright (c) 2008 - 2021 Advanced Micro Devices, Inc. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/platform/program.hpp b/platform/program.hpp index ba6b88fac..f32d403dd 100644 --- a/platform/program.hpp +++ b/platform/program.hpp @@ -1,4 +1,4 @@ -/* Copyright (c) 2008-present Advanced Micro Devices, Inc. +/* Copyright (c) 2008 - 2021 Advanced Micro Devices, Inc. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/platform/runtime.cpp b/platform/runtime.cpp index fa7345f4f..63dfa37de 100644 --- a/platform/runtime.cpp +++ b/platform/runtime.cpp @@ -1,4 +1,4 @@ -/* Copyright (c) 2008-present Advanced Micro Devices, Inc. +/* Copyright (c) 2008 - 2021 Advanced Micro Devices, Inc. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/platform/runtime.hpp b/platform/runtime.hpp index 3c5a765ce..60ab44e1d 100644 --- a/platform/runtime.hpp +++ b/platform/runtime.hpp @@ -1,4 +1,4 @@ -/* Copyright (c) 2008-present Advanced Micro Devices, Inc. +/* Copyright (c) 2008 - 2021 Advanced Micro Devices, Inc. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/platform/sampler.hpp b/platform/sampler.hpp index 1cb969334..cd8b35f57 100644 --- a/platform/sampler.hpp +++ b/platform/sampler.hpp @@ -1,4 +1,4 @@ -/* Copyright (c) 2008-present Advanced Micro Devices, Inc. +/* Copyright (c) 2008 - 2021 Advanced Micro Devices, Inc. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/platform/threadtrace.hpp b/platform/threadtrace.hpp index b4af590f9..fbfbbb904 100644 --- a/platform/threadtrace.hpp +++ b/platform/threadtrace.hpp @@ -1,4 +1,4 @@ -/* Copyright (c) 2008-present Advanced Micro Devices, Inc. +/* Copyright (c) 2008 - 2021 Advanced Micro Devices, Inc. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/thread/monitor.cpp b/thread/monitor.cpp index 198873241..d7d4e2300 100644 --- a/thread/monitor.cpp +++ b/thread/monitor.cpp @@ -1,4 +1,4 @@ -/* Copyright (c) 2008-present Advanced Micro Devices, Inc. +/* Copyright (c) 2008 - 2021 Advanced Micro Devices, Inc. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/thread/monitor.hpp b/thread/monitor.hpp index 717d1ffc3..9b5f73d4f 100644 --- a/thread/monitor.hpp +++ b/thread/monitor.hpp @@ -1,4 +1,4 @@ -/* Copyright (c) 2008-present Advanced Micro Devices, Inc. +/* Copyright (c) 2008 - 2021 Advanced Micro Devices, Inc. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/thread/semaphore.cpp b/thread/semaphore.cpp index 795d8f5a0..78f8addb0 100644 --- a/thread/semaphore.cpp +++ b/thread/semaphore.cpp @@ -1,4 +1,4 @@ -/* Copyright (c) 2008-present Advanced Micro Devices, Inc. +/* Copyright (c) 2008 - 2021 Advanced Micro Devices, Inc. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/thread/semaphore.hpp b/thread/semaphore.hpp index 155439044..f2fa81e1f 100644 --- a/thread/semaphore.hpp +++ b/thread/semaphore.hpp @@ -1,4 +1,4 @@ -/* Copyright (c) 2008-present Advanced Micro Devices, Inc. +/* Copyright (c) 2008 - 2021 Advanced Micro Devices, Inc. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/thread/thread.cpp b/thread/thread.cpp index feb257659..c7a6c35b9 100644 --- a/thread/thread.cpp +++ b/thread/thread.cpp @@ -1,4 +1,4 @@ -/* Copyright (c) 2008-present Advanced Micro Devices, Inc. +/* Copyright (c) 2008 - 2021 Advanced Micro Devices, Inc. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/thread/thread.hpp b/thread/thread.hpp index 790bbfb35..ad88f7ffa 100644 --- a/thread/thread.hpp +++ b/thread/thread.hpp @@ -1,4 +1,4 @@ -/* Copyright (c) 2008-present Advanced Micro Devices, Inc. +/* Copyright (c) 2008 - 2021 Advanced Micro Devices, Inc. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/utils/concurrent.hpp b/utils/concurrent.hpp index 681486440..203212dbb 100644 --- a/utils/concurrent.hpp +++ b/utils/concurrent.hpp @@ -1,4 +1,4 @@ -/* Copyright (c) 2010-present Advanced Micro Devices, Inc. +/* Copyright (c) 2010 - 2021 Advanced Micro Devices, Inc. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/utils/debug.cpp b/utils/debug.cpp index 21c329341..6a43a0d94 100644 --- a/utils/debug.cpp +++ b/utils/debug.cpp @@ -1,4 +1,4 @@ -/* Copyright (c) 2008-present Advanced Micro Devices, Inc. +/* Copyright (c) 2008 - 2021 Advanced Micro Devices, Inc. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/utils/debug.hpp b/utils/debug.hpp index d23a2a988..055aa1c4d 100644 --- a/utils/debug.hpp +++ b/utils/debug.hpp @@ -1,4 +1,4 @@ -/* Copyright (c) 2008-present Advanced Micro Devices, Inc. +/* Copyright (c) 2008 - 2021 Advanced Micro Devices, Inc. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/utils/flags.cpp b/utils/flags.cpp index 237006258..bb4361742 100644 --- a/utils/flags.cpp +++ b/utils/flags.cpp @@ -1,4 +1,4 @@ -/* Copyright (c) 2008-present Advanced Micro Devices, Inc. +/* Copyright (c) 2008 - 2021 Advanced Micro Devices, Inc. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/utils/flags.hpp b/utils/flags.hpp index d7a338203..9c826e1cc 100644 --- a/utils/flags.hpp +++ b/utils/flags.hpp @@ -1,4 +1,4 @@ -/* Copyright (c) 2009-present Advanced Micro Devices, Inc. +/* Copyright (c) 2009 - 2021 Advanced Micro Devices, Inc. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/utils/macros.hpp b/utils/macros.hpp index b7d9f7c8b..02fef7599 100644 --- a/utils/macros.hpp +++ b/utils/macros.hpp @@ -1,4 +1,4 @@ -/* Copyright (c) 2011-present Advanced Micro Devices, Inc. +/* Copyright (c) 2011 - 2021 Advanced Micro Devices, Inc. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/utils/util.hpp b/utils/util.hpp index 0c4753937..1e69ea431 100644 --- a/utils/util.hpp +++ b/utils/util.hpp @@ -1,4 +1,4 @@ -/* Copyright (c) 2008-present Advanced Micro Devices, Inc. +/* Copyright (c) 2008 - 2021 Advanced Micro Devices, Inc. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/utils/versions.hpp b/utils/versions.hpp index 90d68c0be..2e6554f35 100644 --- a/utils/versions.hpp +++ b/utils/versions.hpp @@ -1,4 +1,4 @@ -/* Copyright (c) 2010-present Advanced Micro Devices, Inc. +/* Copyright (c) 2010 - 2021 Advanced Micro Devices, Inc. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal From 678a33b9754e204de3ab35126218f26c3c64e402 Mon Sep 17 00:00:00 2001 From: Chauncey Hui Date: Thu, 8 Jul 2021 03:00:09 -0400 Subject: [PATCH 054/102] SWDEV-2 - Change OpenCL version number from 3333 to 3334 --- utils/versions.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/utils/versions.hpp b/utils/versions.hpp index 2e6554f35..fb306ce9c 100644 --- a/utils/versions.hpp +++ b/utils/versions.hpp @@ -28,7 +28,7 @@ #endif // AMD_PLATFORM_NAME #ifndef AMD_PLATFORM_BUILD_NUMBER -#define AMD_PLATFORM_BUILD_NUMBER 3333 +#define AMD_PLATFORM_BUILD_NUMBER 3334 #endif // AMD_PLATFORM_BUILD_NUMBER #ifndef AMD_PLATFORM_REVISION_NUMBER From 3dc344c465ebb813e65fbaffd95baafb45b2345a Mon Sep 17 00:00:00 2001 From: Vladislav Sytchenko Date: Fri, 9 Jul 2021 11:24:17 -0400 Subject: [PATCH 055/102] SWDEV-273235 - Restore missing LinuxPro option Change-Id: Iccc0206f3f2c5b3c03f6ed313537f0f364a4ca29 --- cmake/ROCclr.cmake | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/cmake/ROCclr.cmake b/cmake/ROCclr.cmake index 4b4c27351..9b1fced62 100644 --- a/cmake/ROCclr.cmake +++ b/cmake/ROCclr.cmake @@ -91,6 +91,13 @@ if(WIN32) target_compile_definitions(rocclr PUBLIC ATI_OS_WIN) else() target_compile_definitions(rocclr PUBLIC ATI_OS_LINUX) + + # Additional settings for LinuxPro + option(BUILD_LINUXPRO "Build LinuxPro" OFF) + if(BUILD_LINUXPRO) + target_compile_definitions(rocclr PUBLIC + ROCCLR_DISABLE_PREVEGA ROCCLR_ENABLE_GL_SHARING) + endif() endif() target_compile_definitions(rocclr PUBLIC From bd6a2c50946027e7ca388cfde8887ba4c3e3d750 Mon Sep 17 00:00:00 2001 From: Chauncey Hui Date: Sat, 10 Jul 2021 03:00:06 -0400 Subject: [PATCH 056/102] SWDEV-2 - Change OpenCL version number from 3334 to 3335 --- utils/versions.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/utils/versions.hpp b/utils/versions.hpp index fb306ce9c..fdf4cb6c0 100644 --- a/utils/versions.hpp +++ b/utils/versions.hpp @@ -28,7 +28,7 @@ #endif // AMD_PLATFORM_NAME #ifndef AMD_PLATFORM_BUILD_NUMBER -#define AMD_PLATFORM_BUILD_NUMBER 3334 +#define AMD_PLATFORM_BUILD_NUMBER 3335 #endif // AMD_PLATFORM_BUILD_NUMBER #ifndef AMD_PLATFORM_REVISION_NUMBER From f9d6df34b41ebee8609fa550e6784fad435c11a4 Mon Sep 17 00:00:00 2001 From: German Andryeyev Date: Fri, 9 Jul 2021 18:19:44 -0400 Subject: [PATCH 057/102] SWDEV-292820 - Add a new notify lock HSA signal calback may occur during the actual marker submit. That may cause a deadlock, because shared lock_ object. Create the new notify_lock_ field to protect the notification. Change-Id: I9752af84e59895530620fac3932c6fc276de8658 --- platform/command.cpp | 2 +- platform/command.hpp | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/platform/command.cpp b/platform/command.cpp index 9cc8b4cce..4c53d55c5 100644 --- a/platform/command.cpp +++ b/platform/command.cpp @@ -265,7 +265,7 @@ bool Event::awaitCompletion() { bool Event::notifyCmdQueue() { HostQueue* queue = command().queue(); if (AMD_DIRECT_DISPATCH) { - ScopedLock l(lock_); + ScopedLock l(notify_lock_); if ((status() > CL_COMPLETE) && (nullptr != queue) && // If HW event was assigned, then notification can be ignored, since a barrier was issued (HwEvent() == nullptr) && diff --git a/platform/command.hpp b/platform/command.hpp index 487b7a98d..36e71360a 100644 --- a/platform/command.hpp +++ b/platform/command.hpp @@ -89,6 +89,7 @@ class Event : public RuntimeObject { private: Monitor lock_; + Monitor notify_lock_; //!< Lock used for notification with direct dispatch only std::atomic callbacks_; //!< linked list of callback entries. std::atomic status_; //!< current execution status. From d9ef43f5315fb8e0528e26acd9e66bb6781c714e Mon Sep 17 00:00:00 2001 From: Vladislav Sytchenko Date: Fri, 9 Jul 2021 15:57:10 -0400 Subject: [PATCH 058/102] SWDEV-273235 - Remove dependency on ADL Currently ADL is only being included if we build HSAIL and not LC. The only configuration that does this is Ellesmere on Linux. As the appprofile logic doesn't do anything right now, it should be safe to remove it. Change-Id: I13ce8c27af08c25b95edcbfd2ee1ce447b0bfd39 --- device/appprofile.cpp | 175 ------------------------------------------ 1 file changed, 175 deletions(-) diff --git a/device/appprofile.cpp b/device/appprofile.cpp index 63fae78c9..23255ecde 100644 --- a/device/appprofile.cpp +++ b/device/appprofile.cpp @@ -22,133 +22,19 @@ #include "os/os.hpp" #include "utils/flags.hpp" #include "appprofile.hpp" -#if !defined(WITH_LIGHTNING_COMPILER) -#include "adl.h" -#endif // !defined(WITH_LIGHTNING_COMPILER) #include #include -#if defined(WITH_LIGHTNING_COMPILER) typedef void* ADLApplicationProfile; int SearchProfileOfAnApplication(const wchar_t* fileName, ADLApplicationProfile** lppProfile) { return 0; } -#define __stdcall -#endif // defined(WITH_LIGHTNING_COMPILER) - -#ifdef BRAHMA -extern int SearchProfileOfAnApplication(const wchar_t* fileName, - ADLApplicationProfile** lppProfile); -#endif // BRAHMA #define GETPROCADDRESS(_adltype_, _adlfunc_) (_adltype_) amd::Os::getSymbol(adlHandle_, #_adlfunc_); namespace amd { -#if !defined(BRAHMA) && !defined(WITH_LIGHTNING_COMPILER) - -static void* __stdcall adlMallocCallback(int n) { return malloc(n); } - -class ADL { - public: - ADL(); - ~ADL(); - - bool init(); - - void* adlHandle() const { return adlHandle_; }; - ADL_CONTEXT_HANDLE adlContext() const { return adlContext_; } - - typedef int (*Adl2MainControlCreate)(ADL_MAIN_MALLOC_CALLBACK callback, - int iEnumConnectedAdapters, ADL_CONTEXT_HANDLE* context); - typedef int (*Adl2MainControlDestroy)(ADL_CONTEXT_HANDLE context); - typedef int (*Adl2ConsoleModeFileDescriptorSet)(ADL_CONTEXT_HANDLE context, int fileDescriptor); - typedef int (*Adl2MainControlRefresh)(ADL_CONTEXT_HANDLE context); - typedef int (*Adl2ApplicationProfilesSystemReload)(ADL_CONTEXT_HANDLE context); - typedef int (*Adl2ApplicationProfilesProfileOfApplicationx2Search)( - ADL_CONTEXT_HANDLE context, const wchar_t* fileName, const wchar_t* path, - const wchar_t* version, const wchar_t* appProfileArea, ADLApplicationProfile** lppProfile); - - Adl2MainControlCreate adl2MainControlCreate; - Adl2MainControlDestroy adl2MainControlDestroy; - Adl2ConsoleModeFileDescriptorSet adl2ConsoleModeFileDescriptorSet; - Adl2MainControlRefresh adl2MainControlRefresh; - Adl2ApplicationProfilesSystemReload adl2ApplicationProfilesSystemReload; - Adl2ApplicationProfilesProfileOfApplicationx2Search - adl2ApplicationProfilesProfileOfApplicationx2Search; - - private: - void* adlHandle_; - ADL_CONTEXT_HANDLE adlContext_; -}; - -ADL::ADL() : adlHandle_(NULL), adlContext_(NULL) { - adl2MainControlCreate = NULL; - adl2MainControlDestroy = NULL; - adl2ConsoleModeFileDescriptorSet = NULL; - adl2MainControlRefresh = NULL; - adl2ApplicationProfilesSystemReload = NULL; - adl2ApplicationProfilesProfileOfApplicationx2Search = NULL; -} - -ADL::~ADL() { - if (adl2MainControlDestroy != NULL) { - adl2MainControlDestroy(adlContext_); - } - adlContext_ = NULL; -} - -bool ADL::init() { - if (!adlHandle_) { - adlHandle_ = amd::Os::loadLibrary("atiadl" LP64_SWITCH(LINUX_SWITCH("xx", "xy"), "xx")); - } - - if (!adlHandle_) { - return false; - } - - adl2MainControlCreate = GETPROCADDRESS(Adl2MainControlCreate, ADL2_Main_Control_Create); - adl2MainControlDestroy = GETPROCADDRESS(Adl2MainControlDestroy, ADL2_Main_Control_Destroy); - adl2ConsoleModeFileDescriptorSet = - GETPROCADDRESS(Adl2ConsoleModeFileDescriptorSet, ADL2_ConsoleMode_FileDescriptor_Set); - adl2MainControlRefresh = GETPROCADDRESS(Adl2MainControlRefresh, ADL2_Main_Control_Refresh); - adl2ApplicationProfilesSystemReload = - GETPROCADDRESS(Adl2ApplicationProfilesSystemReload, ADL2_ApplicationProfiles_System_Reload); - adl2ApplicationProfilesProfileOfApplicationx2Search = - GETPROCADDRESS(Adl2ApplicationProfilesProfileOfApplicationx2Search, - ADL2_ApplicationProfiles_ProfileOfAnApplicationX2_Search); - - if (adl2MainControlCreate == NULL || adl2MainControlDestroy == NULL || - adl2MainControlRefresh == NULL || adl2ApplicationProfilesSystemReload == NULL || - adl2ApplicationProfilesProfileOfApplicationx2Search == NULL) { - return false; - } - - int result = adl2MainControlCreate(adlMallocCallback, 1, &adlContext_); - if (result != ADL_OK) { - // ADL2 is expected to return ADL_ERR_NO_XDISPLAY in Linux Console mode environment - if (result == ADL_ERR_NO_XDISPLAY) { - if (adl2ConsoleModeFileDescriptorSet == NULL || - adl2ConsoleModeFileDescriptorSet(adlContext_, ADL_UNSET) != ADL_OK) { - return false; - } - adl2MainControlRefresh(adlContext_); - } else { - return false; - } - } - - // Reload is disabled in ADL with the change list 1198904 and ticket - // SWDEV-59442 - The ADL_ApplicationProfiles_System_Reload Function is not Re-entrant - // Returned value is ADL_ERR_NOT_SUPPORTED on Windows. - adl2ApplicationProfilesSystemReload(adlContext_); - - return true; -} - -#endif // BRAHMA - AppProfile::AppProfile() : gpuvmHighAddr_(false), profileOverridesAllSettings_(false) { amd::Os::getAppPathAndFileName(appFileName_, appPathAndFileName_); propertyDataMap_.insert( @@ -193,75 +79,14 @@ bool AppProfile::init() { bool AppProfile::ParseApplicationProfile() { ADLApplicationProfile* pProfile = NULL; -#if !defined(BRAHMA) && !defined(WITH_LIGHTNING_COMPILER) - amd::ADL* adl = new amd::ADL; - - if ((adl == NULL) || !adl->init()) { - delete adl; - return false; - } - - // Apply blb configurations - int result = adl->adl2ApplicationProfilesProfileOfApplicationx2Search( - adl->adlContext(), wsAppFileName_.c_str(), NULL, NULL, L"OCL", &pProfile); - - delete adl; - -#else // BRAHMA - if (!SearchProfileOfAnApplication(wsAppFileName_.c_str(), &pProfile)) { return false; } -#endif // BRAHMA - if (pProfile == NULL) { return false; } -#if !defined(WITH_LIGHTNING_COMPILER) - PropertyRecord* firstProperty = pProfile->record; - uint32_t valueOffset = 0; - const int BUFSIZE = 1024; - wchar_t wbuffer[BUFSIZE]; - char buffer[2 * BUFSIZE]; - - for (int index = 0; index < pProfile->iCount; index++) { - PropertyRecord* profileProperty = - reinterpret_cast((reinterpret_cast(firstProperty)) + valueOffset); - - // Get property name - char* propertyName = profileProperty->strName; - auto entry = propertyDataMap_.find(std::string(propertyName)); - if (entry == propertyDataMap_.end()) { - // unexpected name - valueOffset += (sizeof(PropertyRecord) + profileProperty->iDataSize - 4); - continue; - } - - // Get the property value - switch (entry->second.type_) { - case DataType_Boolean: - *(reinterpret_cast(entry->second.data_)) = profileProperty->uData[0] ? true : false; - break; - case DataType_String: { - assert((size_t)(profileProperty->iDataSize) < sizeof(wbuffer) - 2 && - "app profile string too long"); - memset(wbuffer, 0, sizeof(wbuffer)); - memcpy(wbuffer, profileProperty->uData, profileProperty->iDataSize); - size_t len = wcstombs(buffer, wbuffer, sizeof(buffer)); - assert(len < sizeof(buffer) - 1 && "app profile string too long"); - *(reinterpret_cast(entry->second.data_)) = buffer; - break; - } - default: - break; - } - valueOffset += (sizeof(PropertyRecord) + profileProperty->iDataSize - 4); - } - - free(pProfile); -#endif // !defined(WITH_LIGHTNING_COMPILER) return true; } } From 764f9161815c60b0110016ccd7805ee39842f995 Mon Sep 17 00:00:00 2001 From: Chauncey Hui Date: Tue, 13 Jul 2021 03:00:06 -0400 Subject: [PATCH 059/102] SWDEV-2 - Change OpenCL version number from 3335 to 3336 --- utils/versions.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/utils/versions.hpp b/utils/versions.hpp index fdf4cb6c0..7253e41c2 100644 --- a/utils/versions.hpp +++ b/utils/versions.hpp @@ -28,7 +28,7 @@ #endif // AMD_PLATFORM_NAME #ifndef AMD_PLATFORM_BUILD_NUMBER -#define AMD_PLATFORM_BUILD_NUMBER 3335 +#define AMD_PLATFORM_BUILD_NUMBER 3336 #endif // AMD_PLATFORM_BUILD_NUMBER #ifndef AMD_PLATFORM_REVISION_NUMBER From d7a7aa6593bd37201ff12c0a9a4190efe32f82c9 Mon Sep 17 00:00:00 2001 From: Sourabh Betigeri Date: Mon, 12 Jul 2021 23:38:17 -0700 Subject: [PATCH 060/102] SWDEV-290685 - Dispatch a barrier packet with acquire system scope to ensure ordering before a hipStreamWrite() Change-Id: I8853ad86a6634d55a98173ca3f79d93b85c08f85 --- device/rocm/rocvirtual.cpp | 3 +++ 1 file changed, 3 insertions(+) diff --git a/device/rocm/rocvirtual.cpp b/device/rocm/rocvirtual.cpp index 0433435a5..624a807d4 100644 --- a/device/rocm/rocvirtual.cpp +++ b/device/rocm/rocvirtual.cpp @@ -2276,6 +2276,9 @@ void VirtualGPU::submitStreamOperation(amd::StreamOperationCommand& cmd) { amd::Coord3D size(sizeBytes); bool entire = amdMemory->isEntirelyCovered(origin, size); + // Ensure memory ordering preceding the write + dispatchBarrierPacket(kBarrierPacketAcquireHeader); + // Use GPU Blit to write bool result = blitMgr().fillBuffer(*memory, &value, sizeBytes, origin, size, entire, true); ClPrint(amd::LOG_DEBUG, amd::LOG_COPY, "Writting value: 0x%lx", value); From e5100cfafee6184bf6b732d8b8ea995a493b0be6 Mon Sep 17 00:00:00 2001 From: Chauncey Hui Date: Wed, 14 Jul 2021 03:00:08 -0400 Subject: [PATCH 061/102] SWDEV-2 - Change OpenCL version number from 3336 to 3337 --- utils/versions.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/utils/versions.hpp b/utils/versions.hpp index 7253e41c2..3af0f7458 100644 --- a/utils/versions.hpp +++ b/utils/versions.hpp @@ -28,7 +28,7 @@ #endif // AMD_PLATFORM_NAME #ifndef AMD_PLATFORM_BUILD_NUMBER -#define AMD_PLATFORM_BUILD_NUMBER 3336 +#define AMD_PLATFORM_BUILD_NUMBER 3337 #endif // AMD_PLATFORM_BUILD_NUMBER #ifndef AMD_PLATFORM_REVISION_NUMBER From 177776b9e37256d794ddb30678253988df1e91f4 Mon Sep 17 00:00:00 2001 From: German Andryeyev Date: Tue, 13 Jul 2021 12:36:49 -0400 Subject: [PATCH 062/102] SWDEV-290495 - Add HSA_AMD_AGENT_INFO_SVM_DIRECT_HOST_ACCESS query Change-Id: Ib18a53a9016eb5c5ffd51bf6835cba7299ec8421 --- device/rocm/rocdevice.cpp | 21 +++++++++++++++++---- 1 file changed, 17 insertions(+), 4 deletions(-) diff --git a/device/rocm/rocdevice.cpp b/device/rocm/rocdevice.cpp index 17905799a..be27ae96d 100644 --- a/device/rocm/rocdevice.cpp +++ b/device/rocm/rocdevice.cpp @@ -1081,6 +1081,7 @@ Memory* Device::getGpuMemory(amd::Memory* mem) const { return static_cast(mem->getDeviceMemory(*this)); } +// ================================================================================================ bool Device::populateOCLDeviceConstants() { info_.available_ = true; @@ -1572,14 +1573,23 @@ bool Device::populateOCLDeviceConstants() { &info_.hmmCpuMemoryAccessible_)) { LogError("HSA_AMD_SYSTEM_INFO_SVM_ACCESSIBLE_BY_DEFAULT query failed."); } - LogPrintfInfo("HMM support: %d, xnack: %d\n", - info_.hmmSupported_, info_.hmmCpuMemoryAccessible_); + + // HMM specific capability for CPU direct access to device memory + if (HSA_STATUS_SUCCESS != hsa_agent_get_info(_bkendDevice, + static_cast(HSA_AMD_AGENT_INFO_SVM_DIRECT_HOST_ACCESS), + &info_.hmmDirectHostAccess_)) { + LogError("HSA_AMD_AGENT_INFO_SVM_DIRECT_HOST_ACCESS query failed."); + } + + LogPrintfInfo("HMM support: %d, xnack: %d, direct host access: %d\n", + info_.hmmSupported_, info_.hmmCpuMemoryAccessible_, info_.hmmDirectHostAccess_); info_.globalCUMask_ = {}; return true; } +// ================================================================================================ device::VirtualDevice* Device::createVirtualDevice(amd::CommandQueue* queue) { amd::ScopedLock lock(vgpusAccess()); @@ -2266,7 +2276,10 @@ bool Device::SetSvmAttributesInt(const void* dev_ptr, size_t count, //! @note: HMM should support automatic page table update with xnack enabled, //! but currently it doesn't and runtime explicitly enables access from all devices for (const auto dev : devices()) { - attr.push_back({attrib, static_cast(dev)->getBackendDevice().handle}); + // Skip null devices + if (static_cast(dev)->getBackendDevice().handle != 0) { + attr.push_back({attrib, static_cast(dev)->getBackendDevice().handle}); + } } } else { attr.push_back({attrib, getBackendDevice().handle}); @@ -2292,7 +2305,7 @@ bool Device::SetSvmAttributesInt(const void* dev_ptr, size_t count, hsa_status_t status = hsa_amd_svm_attributes_set(const_cast(dev_ptr), count, attr.data(), attr.size()); if (status != HSA_STATUS_SUCCESS) { - LogPrintfError("hsa_amd_svm_attributes_set() failed. Advice: %d", advice); + LogPrintfError("hsa_amd_svm_attributes_set() failed. Advice: %d, status: %d", advice, status); return false; } } else { From b2806439c729c65d8db22340cbd0dc63b5e054fa Mon Sep 17 00:00:00 2001 From: kjayapra-amd Date: Wed, 14 Jul 2021 17:35:26 -0400 Subject: [PATCH 063/102] SWDEV-295144 - Typecast arena mem ptr(0x2) to void*, otherwise results in wrong constructor overload. Change-Id: I433b70dc70377ae0c5f9b29818703e1ac9d95053 --- platform/memory.hpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/platform/memory.hpp b/platform/memory.hpp index 4edd7e282..1b842dee3 100644 --- a/platform/memory.hpp +++ b/platform/memory.hpp @@ -650,7 +650,8 @@ class LiquidFlashFile : public RuntimeObject { class ArenaMemory: public Buffer { public: ArenaMemory(Context& context) - : Buffer(context, 0, std::numeric_limits::max(), kArenaMemoryPtr) {} + : Buffer(context, 0, std::numeric_limits::max(), + reinterpret_cast(kArenaMemoryPtr)) {} }; } // namespace amd From e0e379ac142bad7887376253831371c59d4c19f1 Mon Sep 17 00:00:00 2001 From: Chauncey Hui Date: Thu, 15 Jul 2021 03:00:05 -0400 Subject: [PATCH 064/102] SWDEV-2 - Change OpenCL version number from 3337 to 3338 --- utils/versions.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/utils/versions.hpp b/utils/versions.hpp index 3af0f7458..1daeaffdc 100644 --- a/utils/versions.hpp +++ b/utils/versions.hpp @@ -28,7 +28,7 @@ #endif // AMD_PLATFORM_NAME #ifndef AMD_PLATFORM_BUILD_NUMBER -#define AMD_PLATFORM_BUILD_NUMBER 3337 +#define AMD_PLATFORM_BUILD_NUMBER 3338 #endif // AMD_PLATFORM_BUILD_NUMBER #ifndef AMD_PLATFORM_REVISION_NUMBER From 94c6696ebe1c775561eaebe2216c2c9f83f85580 Mon Sep 17 00:00:00 2001 From: Vladislav Sytchenko Date: Wed, 14 Jul 2021 19:26:42 -0400 Subject: [PATCH 065/102] SWDEV-273235 - [PAL] Ignore system headers on Linux CMake always searches system paths for specified files before resorting to the hints we give it. We should not be using elf and GL headers from /usr/include. Change-Id: I2172ce3da9cc101ced63e22d95ecc031b94f0a67 --- cmake/FindAMD_HSA_LOADER.cmake | 3 ++- cmake/FindAMD_UGL.cmake | 3 ++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/cmake/FindAMD_HSA_LOADER.cmake b/cmake/FindAMD_HSA_LOADER.cmake index 05c685462..0413ea327 100644 --- a/cmake/FindAMD_HSA_LOADER.cmake +++ b/cmake/FindAMD_HSA_LOADER.cmake @@ -28,7 +28,8 @@ find_path(AMD_LIBELF_INCLUDE_DIR libelf.h PATHS ${CMAKE_SOURCE_DIR}/hsail-compiler/lib/loaders/elf/utils/libelf ${CMAKE_SOURCE_DIR}/../hsail-compiler/lib/loaders/elf/utils/libelf - ${CMAKE_SOURCE_DIR}/../../hsail-compiler/lib/loaders/elf/utils/libelf) + ${CMAKE_SOURCE_DIR}/../../hsail-compiler/lib/loaders/elf/utils/libelf + NO_DEFAULT_PATH) find_path(AMD_HSAIL_INCLUDE_DIR hsa.h HINTS diff --git a/cmake/FindAMD_UGL.cmake b/cmake/FindAMD_UGL.cmake index 8548fec54..1246476e9 100644 --- a/cmake/FindAMD_UGL.cmake +++ b/cmake/FindAMD_UGL.cmake @@ -35,7 +35,8 @@ find_path(AMD_UGL_INCLUDE_DIR GL/glx.h ${CMAKE_SOURCE_DIR}/../drivers/drivers ${CMAKE_SOURCE_DIR}/../../drivers/drivers PATH_SUFFIXES - ugl/inc) + ugl/inc + NO_DEFAULT_PATH) include(FindPackageHandleStandardArgs) find_package_handle_standard_args(AMD_UGL From 9fbe7c0e947cff77126d7570568175237303fb83 Mon Sep 17 00:00:00 2001 From: Sourabh Betigeri Date: Wed, 14 Jul 2021 13:46:18 -0700 Subject: [PATCH 066/102] SWDEV-290685 - Relacing release fence instead of acquire for hipStreamWrite() Change-Id: Ic2946b68c427d3e058948c0813863a27c21b903d --- device/rocm/rocvirtual.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/device/rocm/rocvirtual.cpp b/device/rocm/rocvirtual.cpp index 624a807d4..07147600f 100644 --- a/device/rocm/rocvirtual.cpp +++ b/device/rocm/rocvirtual.cpp @@ -2277,7 +2277,7 @@ void VirtualGPU::submitStreamOperation(amd::StreamOperationCommand& cmd) { bool entire = amdMemory->isEntirelyCovered(origin, size); // Ensure memory ordering preceding the write - dispatchBarrierPacket(kBarrierPacketAcquireHeader); + dispatchBarrierPacket(kBarrierPacketReleaseHeader); // Use GPU Blit to write bool result = blitMgr().fillBuffer(*memory, &value, sizeBytes, origin, size, entire, true); From 2cdadae6cbaaa2183dac5d0d7f32c6acc045e5b1 Mon Sep 17 00:00:00 2001 From: German Andryeyev Date: Wed, 14 Jul 2021 19:56:39 -0400 Subject: [PATCH 067/102] SWDEV-292018 - Switch to internal signals for markers Add ref counting to ProfilingSignal class to track the last release. If a signal was used in the marker, then don't reuse it, but create a new one for internal usage. Don't rely on HSA callback for the command status update if there are no pending dispatches. Change-Id: I19f14ed9d80acfe79993b343b2187635f8428a20 --- device/rocm/rocdevice.cpp | 36 ++++------- device/rocm/rocdevice.hpp | 7 ++- device/rocm/rocvirtual.cpp | 120 ++++++++++++++++++++----------------- device/rocm/rocvirtual.hpp | 4 +- platform/command.cpp | 11 ++-- platform/command.hpp | 13 +++- 6 files changed, 98 insertions(+), 93 deletions(-) diff --git a/device/rocm/rocdevice.cpp b/device/rocm/rocdevice.cpp index be27ae96d..69ab3590d 100644 --- a/device/rocm/rocdevice.cpp +++ b/device/rocm/rocdevice.cpp @@ -3012,36 +3012,22 @@ amd::Memory* Device::GetArenaMemObj(const void* ptr, size_t& offset) { } // ================================================================================================ -ProfilingSignal* Device::GetGlobalSignal(Timestamp* ts) const { - std::unique_ptr prof_signal(new ProfilingSignal()); - if (prof_signal != nullptr) { - hsa_agent_t agent = getBackendDevice(); - hsa_agent_t* agents = (settings().system_scope_signal_) ? nullptr : &agent; - uint32_t num_agents = (settings().system_scope_signal_) ? 0 : 1; - - if (ts != 0) { - // Save HSA signal earlier to make sure the possible callback will have a valid - // value for processing - prof_signal->ts_ = ts; - ts->AddProfilingSignal(prof_signal.get()); - } - - if (HSA_STATUS_SUCCESS == hsa_signal_create(kInitSignalValueOne, - num_agents, agents, &prof_signal->signal_)) { - return prof_signal.release(); - } +void Device::ReleaseGlobalSignal(void* signal) const { + if (signal != nullptr) { + reinterpret_cast(signal)->release(); } - return nullptr; } // ================================================================================================ -void Device::ReleaseGlobalSignal(void* signal) const { - if (signal != nullptr) { - ProfilingSignal* prof_signal = reinterpret_cast(signal); - if (prof_signal->signal_.handle != 0) { - hsa_signal_destroy(prof_signal->signal_); +ProfilingSignal::~ProfilingSignal() { + if (signal_.handle != 0) { + if (hsa_signal_load_relaxed(signal_) > 0) { + LogError("Runtime shouldn't destroy a signal that is still busy!"); + if (hsa_signal_wait_scacquire(signal_, HSA_SIGNAL_CONDITION_LT, kInitSignalValueOne, + kUnlimitedWait, HSA_WAIT_STATE_BLOCKED) != 0) { + } } - delete prof_signal; + hsa_signal_destroy(signal_); } } diff --git a/device/rocm/rocdevice.hpp b/device/rocm/rocdevice.hpp index 572d817c6..e8f479104 100644 --- a/device/rocm/rocdevice.hpp +++ b/device/rocm/rocdevice.hpp @@ -77,7 +77,8 @@ class VirtualDevice; class PrintfDbg; class IProDevice; -struct ProfilingSignal : public amd::HeapObject { +class ProfilingSignal : public amd::ReferenceCountedObject { +public: hsa_signal_t signal_; //!< HSA signal to track profiling information Timestamp* ts_; //!< Timestamp object associated with the signal HwQueueEngine engine_; //!< Engine used with this signal @@ -89,6 +90,8 @@ struct ProfilingSignal : public amd::HeapObject { , done_(true) , lock_("Signal Ops Lock", true) { signal_.handle = 0; } + + virtual ~ProfilingSignal(); amd::Monitor& LockSignalOps() { return lock_; } }; @@ -531,8 +534,6 @@ class Device : public NullDevice { virtual amd::Memory* GetArenaMemObj(const void* ptr, size_t& offset); - ProfilingSignal* GetGlobalSignal(Timestamp* ts) const; - private: bool create(); diff --git a/device/rocm/rocvirtual.cpp b/device/rocm/rocvirtual.cpp index 07147600f..96172b6af 100644 --- a/device/rocm/rocvirtual.cpp +++ b/device/rocm/rocvirtual.cpp @@ -319,10 +319,7 @@ void VirtualGPU::MemoryDependency::clear(bool all) { // ================================================================================================ VirtualGPU::HwQueueTracker::~HwQueueTracker() { for (auto& signal: signal_list_) { - if (signal->signal_.handle != 0) { - hsa_signal_destroy(signal->signal_); - } - delete signal; + signal->release(); } } @@ -374,6 +371,26 @@ hsa_signal_t VirtualGPU::HwQueueTracker::ActiveSignal( // a GPU waiter(which may be not triggered yet) and CPU signal reset below WaitNext(); + if (signal_list_[current_id_]->referenceCount() > 1) { + // The signal was assigned to the global marker's event, hence runtime can't reuse it + // and needs a new signal + std::unique_ptr signal(new ProfilingSignal()); + if (signal != nullptr) { + hsa_agent_t agent = gpu_.gpu_device(); + const Settings& settings = gpu_.dev().settings(); + hsa_agent_t* agents = (settings.system_scope_signal_) ? nullptr : &agent; + uint32_t num_agents = (settings.system_scope_signal_) ? 0 : 1; + + if (HSA_STATUS_SUCCESS == hsa_signal_create(0, num_agents, agents, &signal->signal_)) { + signal_list_[current_id_]->release(); + signal_list_[current_id_] = signal.release(); + } else { + assert(!"ProfilingSignal reallocaiton failed! Marker has a conflict with signal reuse!"); + } + } else { + assert(!"ProfilingSignal reallocaiton failed! Marker has a conflict with signal reuse!"); + } + } ProfilingSignal* prof_signal = signal_list_[current_id_]; // Reset the signal and return hsa_signal_silent_store_relaxed(prof_signal->signal_, init_val); @@ -387,7 +404,23 @@ hsa_signal_t VirtualGPU::HwQueueTracker::ActiveSignal( // If direct dispatch is enabled and the batch head isn't null, then it's a marker and // requires the batch update upon HSA signal completion if (AMD_DIRECT_DISPATCH && (ts->command().GetBatchHead() != nullptr)) { - assert(false && "Runtime should not have batch command in ActiveSignal!"); + uint32_t init_value = kInitSignalValueOne; + // If API callback is enabled, then use a blocking signal for AQL queue. + // HSA signal will be acquired in SW and released after HSA signal callback + if (ts->command().Callback() != nullptr) { + ts->SetCallbackSignal(prof_signal->signal_); + // Blocks AQL queue from further processing + hsa_signal_add_relaxed(prof_signal->signal_, 1); + init_value += 1; + } + hsa_status_t result = hsa_amd_signal_async_handler(prof_signal->signal_, + HSA_SIGNAL_CONDITION_LT, init_value, &HsaAmdSignalHandler, ts); + if (HSA_STATUS_SUCCESS != result) { + LogError("hsa_amd_signal_async_handler() failed to set the handler!"); + } else { + ClPrint(amd::LOG_INFO, amd::LOG_SIG, "Set Handler: handle(0x%lx), timestamp(%p)", + prof_signal->signal_.handle, prof_signal); + } } if (!sdma_profiling_) { hsa_amd_profiling_async_copy_enable(true); @@ -872,8 +905,7 @@ bool VirtualGPU::dispatchCounterAqlPacket(hsa_ext_amd_aql_pm4_packet_t* packet, } // ================================================================================================ -void VirtualGPU::dispatchBarrierPacket(uint16_t packetHeader, - bool skipSignal, const ProfilingSignal* global_signal) { +void VirtualGPU::dispatchBarrierPacket(uint16_t packetHeader, bool skipSignal) { const uint32_t queueSize = gpu_queue_->size; const uint32_t queueMask = queueSize - 1; @@ -896,16 +928,12 @@ void VirtualGPU::dispatchBarrierPacket(uint16_t packetHeader, barrier_packet_.completion_signal.handle = 0; if (!skipSignal) { - if (global_signal != nullptr) { - barrier_packet_.completion_signal = global_signal->signal_; - } else { - // Pool size must grow to the size of pending AQL packets - const uint32_t pool_size = index - read; + // Pool size must grow to the size of pending AQL packets + const uint32_t pool_size = index - read; - // Get active signal for current dispatch if profiling is necessary - barrier_packet_.completion_signal = - Barriers().ActiveSignal(kInitSignalValueOne, timestamp_, pool_size); - } + // Get active signal for current dispatch if profiling is necessary + barrier_packet_.completion_signal = + Barriers().ActiveSignal(kInitSignalValueOne, timestamp_, pool_size); } while ((index - hsa_queue_load_read_index_scacquire(gpu_queue_)) >= queueMask); @@ -1226,6 +1254,12 @@ void VirtualGPU::profilingEnd(amd::Command& command) { } command.setData(timestamp_); + // Update HW event only for batches + if ((AMD_DIRECT_DISPATCH) && (command.GetBatchHead() != nullptr)) { + timestamp_->Signals().back()->retain(); + command.SetHwEvent(timestamp_->Signals().back()); + } + timestamp_ = nullptr; } } @@ -2889,7 +2923,7 @@ void VirtualGPU::submitKernel(amd::NDRangeKernelCommand& vcmd) { queue->profilingEnd(vcmd); } else { - // Make sure VirtualGPU has an exclusive access to the resources + // Make sure VirtualGPU has an exclusive access to the resources amd::ScopedLock lock(execution()); profilingBegin(vcmd); @@ -2913,47 +2947,23 @@ void VirtualGPU::submitNativeFn(amd::NativeFnCommand& cmd) { // ================================================================================================ void VirtualGPU::submitMarker(amd::Marker& vcmd) { if (AMD_DIRECT_DISPATCH || vcmd.profilingInfo().marker_ts_) { - profilingBegin(vcmd); - if (timestamp_ != nullptr) { - ProfilingSignal* prof_signal = nullptr; - // If direct dispatch is enabled and the batch head isn't null, then it's a marker and - // requires the batch update upon HSA signal completion - if (AMD_DIRECT_DISPATCH) { - assert(vcmd.GetBatchHead() != nullptr && "Marker doesn't have batch!"); - - prof_signal = dev().GetGlobalSignal(timestamp_); - prof_signal->done_ = false; - - assert(prof_signal != nullptr && "Failed to allocate the global HSA signal!"); - uint32_t init_value = kInitSignalValueOne; - // If API callback is enabled, then use a blocking signal for AQL queue. - // HSA signal will be acquired in SW and released after HSA signal callback - if (vcmd.Callback() != nullptr) { - timestamp_->SetCallbackSignal(prof_signal->signal_); - // Blocks AQL queue from further processing - hsa_signal_add_relaxed(prof_signal->signal_, 1); - init_value += 1; - } + // Make sure VirtualGPU has an exclusive access to the resources + amd::ScopedLock lock(execution()); + if (vcmd.CpuWaitRequested() && hasPendingDispatch_ == false) { + // It should be safe to call flush directly if there are not pending dispatches without + // HSA signal callback + flush(vcmd.GetBatchHead()); + } else { + profilingBegin(vcmd); + if (timestamp_ != nullptr) { + // Submit a barrier with a cache flushes. + dispatchBarrierPacket(kBarrierPacketHeader, false); - hsa_status_t result = hsa_amd_signal_async_handler(prof_signal->signal_, - HSA_SIGNAL_CONDITION_LT, init_value, &HsaAmdSignalHandler, timestamp_); - if (HSA_STATUS_SUCCESS != result) { - LogError("hsa_amd_signal_async_handler() failed to set the handler!"); - } else { - ClPrint(amd::LOG_INFO, amd::LOG_SIG, "Set Handler: handle(0x%lx), timestamp(%p)", - prof_signal->signal_.handle, prof_signal); - } - // Update HW event only for batches - vcmd.SetHwEvent(timestamp_->Signals().back()); + hasPendingDispatch_ = false; } - // Submit a barrier with a cache flushes. - dispatchBarrierPacket(kBarrierPacketHeader, false, prof_signal); - - // Don't reset the flag for direct dispatch, because the global signals are out of scope - // for internal barrier tracking and SDMA could lose a wait for compute - hasPendingDispatch_ = AMD_DIRECT_DISPATCH; + profilingEnd(vcmd); } - profilingEnd(vcmd); + } } diff --git a/device/rocm/rocvirtual.hpp b/device/rocm/rocvirtual.hpp index 85f938370..f980358f2 100644 --- a/device/rocm/rocvirtual.hpp +++ b/device/rocm/rocvirtual.hpp @@ -223,6 +223,7 @@ class VirtualGPU : public device::VirtualDevice { //! Update current active engine void SetActiveEngine(HwQueueEngine engine = HwQueueEngine::Compute) { engine_ = engine; } + HwQueueEngine GetActiveEngine() const { return engine_; } //! Returns the last submitted signal for a wait std::vector& WaitingSignal(HwQueueEngine engine = HwQueueEngine::Compute); @@ -385,8 +386,7 @@ class VirtualGPU : public device::VirtualDevice { template bool dispatchGenericAqlPacket(AqlPacket* packet, uint16_t header, uint16_t rest, bool blocking, size_t size = 1); - void dispatchBarrierPacket(uint16_t packetHeader, bool skipSignal = false, - const ProfilingSignal* global_signal = nullptr); + void dispatchBarrierPacket(uint16_t packetHeader, bool skipSignal = false); bool dispatchCounterAqlPacket(hsa_ext_amd_aql_pm4_packet_t* packet, const uint32_t gfxVersion, bool blocking, const hsa_ven_amd_aqlprofile_1_00_pfn_t* extApi); void dispatchBarrierValuePacket(const hsa_amd_barrier_value_packet_t* packet, diff --git a/platform/command.cpp b/platform/command.cpp index 4c53d55c5..f4567f417 100644 --- a/platform/command.cpp +++ b/platform/command.cpp @@ -232,11 +232,12 @@ void Event::processCallbacks(int32_t status) const { } } +static constexpr bool kCpuWait = true; // ================================================================================================ bool Event::awaitCompletion() { if (status() > CL_COMPLETE) { - // Notifies current command queue about waiting - if (!notifyCmdQueue()) { + // Notifies the current command queue about waiting + if (!notifyCmdQueue(kCpuWait)) { return false; } @@ -262,7 +263,7 @@ bool Event::awaitCompletion() { } // ================================================================================================ -bool Event::notifyCmdQueue() { +bool Event::notifyCmdQueue(bool cpu_wait) { HostQueue* queue = command().queue(); if (AMD_DIRECT_DISPATCH) { ScopedLock l(notify_lock_); @@ -271,7 +272,7 @@ bool Event::notifyCmdQueue() { (HwEvent() == nullptr) && !notified_.test_and_set()) { // Make sure the queue is draining the enqueued commands. - amd::Command* command = new amd::Marker(*queue, false, nullWaitList, this); + amd::Command* command = new amd::Marker(*queue, false, nullWaitList, this, cpu_wait); if (command == NULL) { notified_.clear(); return false; @@ -341,7 +342,7 @@ void Command::enqueue() { // Notify all commands about the waiter. Barrier will be sent in order to obtain // HSA signal for a wait on the current queue std::for_each(eventWaitList().begin(), eventWaitList().end(), - std::mem_fun(&Command::notifyCmdQueue)); + std::bind2nd(std::mem_fun(&Command::notifyCmdQueue), !kCpuWait)); // The batch update must be lock protected to avoid a race condition // when multiple threads submit/flush/update the batch at the same time diff --git a/platform/command.hpp b/platform/command.hpp index 36e71360a..7282a28c1 100644 --- a/platform/command.hpp +++ b/platform/command.hpp @@ -207,7 +207,7 @@ class Event : public RuntimeObject { /*! \brief Notifies current command queue about execution status */ - bool notifyCmdQueue(); + bool notifyCmdQueue(bool cpu_wait = false); //! RTTI internal implementation virtual ObjectType objectType() const { return ObjectTypeEvent; } @@ -998,15 +998,22 @@ class ExternalSemaphoreCmd : public Command { class Marker : public Command { + private: + bool cpu_wait_; //!< If true, then the marker was issued for CPU/GPU sync + public: //! Create a new Marker Marker(HostQueue& queue, bool userVisible, const EventWaitList& eventWaitList = nullWaitList, - const Event* waitingEvent = nullptr) - : Command(queue, userVisible ? CL_COMMAND_MARKER : 0, eventWaitList, 0, waitingEvent) {} + const Event* waitingEvent = nullptr, bool cpu_wait = false) + : Command(queue, userVisible ? CL_COMMAND_MARKER : 0, eventWaitList, 0, waitingEvent) + , cpu_wait_(cpu_wait) {} //! The actual command implementation. virtual void submit(device::VirtualDevice& device) { device.submitMarker(*this); } + //! Check if this marker requires CPU wait + bool CpuWaitRequested() const { return cpu_wait_; } + }; /*! \brief Maps CL objects created from external ones and syncs the contents (blocking). From e62e229af75967c58fd0ab07636e099079dda0f0 Mon Sep 17 00:00:00 2001 From: Chauncey Hui Date: Fri, 16 Jul 2021 03:00:07 -0400 Subject: [PATCH 068/102] SWDEV-2 - Change OpenCL version number from 3338 to 3339 --- utils/versions.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/utils/versions.hpp b/utils/versions.hpp index 1daeaffdc..43e343c09 100644 --- a/utils/versions.hpp +++ b/utils/versions.hpp @@ -28,7 +28,7 @@ #endif // AMD_PLATFORM_NAME #ifndef AMD_PLATFORM_BUILD_NUMBER -#define AMD_PLATFORM_BUILD_NUMBER 3338 +#define AMD_PLATFORM_BUILD_NUMBER 3339 #endif // AMD_PLATFORM_BUILD_NUMBER #ifndef AMD_PLATFORM_REVISION_NUMBER From 6324e3c3cd197338442023f1b96f06dbaf64c27f Mon Sep 17 00:00:00 2001 From: German Andryeyev Date: Fri, 16 Jul 2021 18:20:51 -0400 Subject: [PATCH 069/102] SWDEV-290384 - Disable HSA callback for any host wait Change-Id: Ie876deb62859f5551f4ed69eb8187ac3fa35f42a --- device/rocm/rocvirtual.cpp | 15 ++++++--------- device/rocm/rocvirtual.hpp | 6 +++--- platform/command.hpp | 15 ++++++--------- 3 files changed, 15 insertions(+), 21 deletions(-) diff --git a/device/rocm/rocvirtual.cpp b/device/rocm/rocvirtual.cpp index 96172b6af..1c0773bf5 100644 --- a/device/rocm/rocvirtual.cpp +++ b/device/rocm/rocvirtual.cpp @@ -403,7 +403,8 @@ hsa_signal_t VirtualGPU::HwQueueTracker::ActiveSignal( ts->AddProfilingSignal(prof_signal); // If direct dispatch is enabled and the batch head isn't null, then it's a marker and // requires the batch update upon HSA signal completion - if (AMD_DIRECT_DISPATCH && (ts->command().GetBatchHead() != nullptr)) { + if (AMD_DIRECT_DISPATCH && (ts->command().GetBatchHead() != nullptr) && + !ts->command().CpuWaitRequested()) { uint32_t init_value = kInitSignalValueOne; // If API callback is enabled, then use a blocking signal for AQL queue. // HSA signal will be acquired in SW and released after HSA signal callback @@ -421,6 +422,9 @@ hsa_signal_t VirtualGPU::HwQueueTracker::ActiveSignal( ClPrint(amd::LOG_INFO, amd::LOG_SIG, "Set Handler: handle(0x%lx), timestamp(%p)", prof_signal->signal_.handle, prof_signal); } + // Update the current command/marker with HW event + prof_signal->retain(); + ts->command().SetHwEvent(prof_signal); } if (!sdma_profiling_) { hsa_amd_profiling_async_copy_enable(true); @@ -1253,13 +1257,6 @@ void VirtualGPU::profilingEnd(amd::Command& command) { timestamp_->end(); } command.setData(timestamp_); - - // Update HW event only for batches - if ((AMD_DIRECT_DISPATCH) && (command.GetBatchHead() != nullptr)) { - timestamp_->Signals().back()->retain(); - command.SetHwEvent(timestamp_->Signals().back()); - } - timestamp_ = nullptr; } } @@ -2949,7 +2946,7 @@ void VirtualGPU::submitMarker(amd::Marker& vcmd) { if (AMD_DIRECT_DISPATCH || vcmd.profilingInfo().marker_ts_) { // Make sure VirtualGPU has an exclusive access to the resources amd::ScopedLock lock(execution()); - if (vcmd.CpuWaitRequested() && hasPendingDispatch_ == false) { + if (vcmd.CpuWaitRequested()) { // It should be safe to call flush directly if there are not pending dispatches without // HSA signal callback flush(vcmd.GetBatchHead()); diff --git a/device/rocm/rocvirtual.hpp b/device/rocm/rocvirtual.hpp index f980358f2..f5c8666a9 100644 --- a/device/rocm/rocvirtual.hpp +++ b/device/rocm/rocvirtual.hpp @@ -89,7 +89,7 @@ class Timestamp : public amd::HeapObject { uint64_t start_; uint64_t end_; VirtualGPU* gpu_; //!< Virtual GPU, associated with this timestamp - const amd::Command& command_; //!< Command, associated with this timestamp + amd::Command& command_; ///!< Command, associated with this timestamp amd::Command* parsedCommand_; //!< Command down the list, considering command_ as head std::vector signals_; //!< The list of all signals, associated with the TS hsa_signal_t callback_signal_; //!< Signal associated with a callback for possible later update @@ -98,7 +98,7 @@ class Timestamp : public amd::HeapObject { Timestamp& operator=(const Timestamp&) = delete; public: - Timestamp(VirtualGPU* gpu, const amd::Command& command) + Timestamp(VirtualGPU* gpu, amd::Command& command) : start_(std::numeric_limits::max()) , end_(0) , gpu_(gpu) @@ -144,7 +144,7 @@ class Timestamp : public amd::HeapObject { static double getGpuTicksToTime() { return ticksToTime_; } //! Returns amd::command assigned to this timestamp - const amd::Command& command() const { return command_; } + amd::Command& command() const { return command_; } //! Sets the parsed command void setParsedCommand(amd::Command* command) { parsedCommand_ = command; } diff --git a/platform/command.hpp b/platform/command.hpp index 7282a28c1..91a412f4e 100644 --- a/platform/command.hpp +++ b/platform/command.hpp @@ -242,6 +242,8 @@ class Command : public Event { const Event* waitingEvent_; //!< Waiting event associated with the marker protected: + bool cpu_wait_ = false; //!< If true, then the command was issued for CPU/GPU sync + //! The Events that need to complete before this command is submitted. EventWaitList eventWaitList_; @@ -336,6 +338,9 @@ class Command : public Event { Command* GetBatchHead() const { return batch_head_; } const Event* waitingEvent() const { return waitingEvent_; } + + //! Check if this command(should be a marker) requires CPU wait + bool CpuWaitRequested() const { return cpu_wait_; } }; class UserEvent : public Command { @@ -998,22 +1003,14 @@ class ExternalSemaphoreCmd : public Command { class Marker : public Command { - private: - bool cpu_wait_; //!< If true, then the marker was issued for CPU/GPU sync - public: //! Create a new Marker Marker(HostQueue& queue, bool userVisible, const EventWaitList& eventWaitList = nullWaitList, const Event* waitingEvent = nullptr, bool cpu_wait = false) - : Command(queue, userVisible ? CL_COMMAND_MARKER : 0, eventWaitList, 0, waitingEvent) - , cpu_wait_(cpu_wait) {} + : Command(queue, userVisible ? CL_COMMAND_MARKER : 0, eventWaitList, 0, waitingEvent) { cpu_wait_ = cpu_wait; } //! The actual command implementation. virtual void submit(device::VirtualDevice& device) { device.submitMarker(*this); } - - //! Check if this marker requires CPU wait - bool CpuWaitRequested() const { return cpu_wait_; } - }; /*! \brief Maps CL objects created from external ones and syncs the contents (blocking). From ffbf6e0f49ccff38fa264b94f245553e77fe2231 Mon Sep 17 00:00:00 2001 From: Vladislav Sytchenko Date: Fri, 16 Jul 2021 16:11:41 -0400 Subject: [PATCH 070/102] SWDEV-294514 - Limit HIP-GL interop logic to HIP Below logic is causing a crash in the CL-GL interop. As a workaround, limit it only to HIP. Change-Id: I12e81d035ebd80a4a9a09eb6eea2fae7040d90c9 --- platform/memory.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/platform/memory.cpp b/platform/memory.cpp index 20eef2bbf..0ebe458f8 100644 --- a/platform/memory.cpp +++ b/platform/memory.cpp @@ -301,7 +301,7 @@ bool Memory::create(void* initFrom, bool sysMemAlloc, bool skipAlloc, bool force LogPrintfError("Can't allocate memory size - 0x%08X bytes!", getSize()); return false; } - if (isInterop()) { + if (amd::IS_HIP && isInterop()) { // Interop resources dont' have svm allocations, we use device address for mapping. amd::MemObjMap::AddMemObj( reinterpret_cast(static_cast(mem->virtualAddress())), this); @@ -415,7 +415,7 @@ Memory::~Memory() { if (NULL != deviceMemories_) { // Destroy all device memory objects for (uint i = 0; i < numDevices_; ++i) { - if (isInterop() && deviceMemories_[i].value_ != nullptr) { + if (amd::IS_HIP && isInterop() && deviceMemories_[i].value_ != nullptr) { amd::MemObjMap::RemoveMemObj(reinterpret_cast( static_cast(deviceMemories_[i].value_->virtualAddress()))); } From 3c0a478ead2d948daea9f1f2f9722e56b412f793 Mon Sep 17 00:00:00 2001 From: Vladislav Sytchenko Date: Mon, 28 Jun 2021 14:21:02 -0400 Subject: [PATCH 071/102] SWDEV-283981 - [PAL] Support hostcall SQ interrupt Change-Id: Ic6f0ad384404712d5eeb1eaf600e231a8f8631f7 --- device/devhostcall.cpp | 6 ++-- device/pal/palsignal.cpp | 70 +++++++++++++++++++++++++++++++++++++++- device/pal/palsignal.hpp | 3 ++ 3 files changed, 75 insertions(+), 4 deletions(-) diff --git a/device/devhostcall.cpp b/device/devhostcall.cpp index a73c4451b..c49ecc012 100644 --- a/device/devhostcall.cpp +++ b/device/devhostcall.cpp @@ -335,10 +335,10 @@ void HostcallListener::removeBuffer(HostcallBuffer* buffer) { bool HostcallListener::initialize(const amd::Device &dev) { doorbell_ = dev.createSignal(); -#ifdef WITH_HSA_DEVICE - auto ws = device::Signal::WaitState::Blocked; -#else +#if defined(WITH_PAL_DEVICE) && !defined(_WIN32) auto ws = device::Signal::WaitState::Active; +#else + auto ws = device::Signal::WaitState::Blocked; #endif if ((doorbell_ == nullptr) || !doorbell_->Init(dev, SIGNAL_INIT, ws)) { return false; diff --git a/device/pal/palsignal.cpp b/device/pal/palsignal.cpp index 4f7ac33cc..076464741 100644 --- a/device/pal/palsignal.cpp +++ b/device/pal/palsignal.cpp @@ -29,6 +29,21 @@ namespace pal { Signal::~Signal() { dev_->context().svmFree(amdSignal_); + + if (ws_ == device::Signal::WaitState::Blocked) { +#if defined(_WIN32) + Pal::Result result = Pal::Result::Success; + + Pal::UnregisterEventInfo eventInfo = {}; + eventInfo.pEvent = &event_; + eventInfo.trackingType = Pal::EventTrackingType::ShaderInterrupt; + result = dev_->iDev()->UnregisterEvent(eventInfo); + if (result != Pal::Result::Success) { + ClPrint(amd::LOG_ERROR, amd::LOG_QUEUE, + "Failed to unregister SQ event needed for hostcall buffer"); + } +#endif + } } bool Signal::Init(const amd::Device& dev, uint64_t init, device::Signal::WaitState ws) { @@ -47,6 +62,47 @@ bool Signal::Init(const amd::Device& dev, uint64_t init, device::Signal::WaitSta amdSignal_ = new (buffer) amd_signal_t(); amdSignal_->value = init; + if (ws_ == device::Signal::WaitState::Blocked) { +#if defined(_WIN32) + Pal::Result result = Pal::Result::Success; + + Util::EventCreateFlags flags = {}; + flags.manualReset = false; + flags.initiallySignaled = false; + result = event_.Init(flags); + if (result != Pal::Result::Success) { + ClPrint(amd::LOG_ERROR, amd::LOG_QUEUE, + "Failed to create Pal::Util::Event needed for hostcall buffer"); + return false; + } + + result = event_.Set(); + if (result != Pal::Result::Success) { + ClPrint(amd::LOG_ERROR, amd::LOG_QUEUE, + "Failed to set Pal::Util::Event needed for hostcall buffer"); + return false; + } + + Pal::RegisterEventInfo eventInputInfo = {}; + eventInputInfo.pEvent = &event_; + eventInputInfo.trackingType = Pal::EventTrackingType::ShaderInterrupt; + Pal::RegisterEventOutputInfo eventOutputInfo = {}; + result = dev_->iDev()->RegisterEvent( + eventInputInfo, + &eventOutputInfo); + if (result != Pal::Result::Success) { + ClPrint(amd::LOG_ERROR, amd::LOG_QUEUE, + "Failed to register SQ event needed for hostcall buffer"); + return false; + } + amdSignal_->event_id = eventOutputInfo.shaderInterrupt.eventId; + amdSignal_->event_mailbox_ptr = eventOutputInfo.shaderInterrupt.eventMailboxGpuVa; + ClPrint(amd::LOG_INFO, amd::LOG_INIT, + "Registered SQ event %d with mailbox slot %p", + amdSignal_->event_id, amdSignal_->event_mailbox_ptr); +#endif + } + return true; } @@ -67,7 +123,19 @@ uint64_t Signal::Wait(uint64_t value, device::Signal::Condition c, uint64_t time } (c); if (ws_ == device::Signal::WaitState::Blocked) { - guarantee(false, "Unimplemented"); +#if defined(_WIN32) + Pal::Result result = Pal::Result::Success; + + float timeoutInSec = timeout / (1000 * 1000); + result = event_.Wait(timeoutInSec); + + if (result != Pal::Result::Success) { + return -1; + } + + std::atomic_thread_fence(std::memory_order_acquire); + return amdSignal_->value; +#endif } else if (ws_ == device::Signal::WaitState::Active) { auto start = amd::Os::timeNanos(); while (true) { diff --git a/device/pal/palsignal.hpp b/device/pal/palsignal.hpp index d043c6a92..781cbd6b1 100644 --- a/device/pal/palsignal.hpp +++ b/device/pal/palsignal.hpp @@ -24,6 +24,8 @@ #include +#include "palEvent.h" + namespace pal { class Device; @@ -32,6 +34,7 @@ class Signal: public device::Signal { private: const Device* dev_; amd_signal_t* amdSignal_; + Util::Event event_; public: ~Signal() override; From cc17b1fc15d654cbe5bbbdf028b3f70bd1647d63 Mon Sep 17 00:00:00 2001 From: German Andryeyev Date: Mon, 19 Jul 2021 15:05:24 -0400 Subject: [PATCH 072/102] SWDEV-290384 - Enable active wait on CPU if HIP requested Change-Id: Idea5adf7a4705cb999da6785e6229fe3200dce17 --- device/rocm/rocdevice.cpp | 3 ++- device/rocm/rocvirtual.cpp | 2 +- device/rocm/rocvirtual.hpp | 36 ++++++++++++++++++------------------ 3 files changed, 21 insertions(+), 20 deletions(-) diff --git a/device/rocm/rocdevice.cpp b/device/rocm/rocdevice.cpp index 69ab3590d..c72e14c28 100644 --- a/device/rocm/rocdevice.cpp +++ b/device/rocm/rocdevice.cpp @@ -2546,7 +2546,8 @@ bool Device::IsHwEventReady(const amd::Event& event, bool wait) const { ClPrint(amd::LOG_INFO, amd::LOG_SIG, "No HW event"); return false; } else if (wait) { - WaitForSignal(reinterpret_cast(hw_event)->signal_); + auto* vdev = event.command().queue()->vdev(); + WaitForSignal(reinterpret_cast(hw_event)->signal_, vdev->ActiveWait()); return true; } return (hsa_signal_load_relaxed(reinterpret_cast(hw_event)->signal_) <= 0); diff --git a/device/rocm/rocvirtual.cpp b/device/rocm/rocvirtual.cpp index 1c0773bf5..9cde1f30a 100644 --- a/device/rocm/rocvirtual.cpp +++ b/device/rocm/rocvirtual.cpp @@ -508,7 +508,7 @@ bool VirtualGPU::HwQueueTracker::CpuWaitForSignal(ProfilingSignal* signal) { } else { ClPrint(amd::LOG_DEBUG, amd::LOG_COPY, "[%zx]!\t Host wait on completion_signal=0x%zx", std::this_thread::get_id(), signal->signal_.handle); - if (!WaitForSignal(signal->signal_)) { + if (!WaitForSignal(signal->signal_, gpu_.ActiveWait())) { LogPrintfError("Failed signal [0x%lx] wait", signal->signal_); return false; } diff --git a/device/rocm/rocvirtual.hpp b/device/rocm/rocvirtual.hpp index f5c8666a9..0d5b41010 100644 --- a/device/rocm/rocvirtual.hpp +++ b/device/rocm/rocvirtual.hpp @@ -45,34 +45,34 @@ constexpr static uint64_t kTimeout100us = 100 * K; constexpr static uint64_t kUnlimitedWait = std::numeric_limits::max(); template -inline bool WaitForSignal(hsa_signal_t signal) { +inline bool WaitForSignal(hsa_signal_t signal, bool active_wait = false) { if (hsa_signal_load_relaxed(signal) > 0) { - if (active_wait_timeout) { - uint64_t timeout = ROC_ACTIVE_WAIT_TIMEOUT * K; + uint64_t timeout = kTimeout100us; + if (active_wait) { + timeout = kUnlimitedWait; + } else if (active_wait_timeout) { + timeout = ROC_ACTIVE_WAIT_TIMEOUT * K; if (timeout == 0) { return false; } - ClPrint(amd::LOG_INFO, amd::LOG_SIG, "Host active wait for Signal = (0x%lx) for %d us", - signal.handle, ROC_ACTIVE_WAIT_TIMEOUT); + } - if (hsa_signal_wait_scacquire(signal, HSA_SIGNAL_CONDITION_LT, kInitSignalValueOne, - timeout, HSA_WAIT_STATE_ACTIVE) != 0) { + ClPrint(amd::LOG_INFO, amd::LOG_SIG, "Host active wait for Signal = (0x%lx) for %d ns", + signal.handle, timeout); + + // Active wait with a timeout + if (hsa_signal_wait_scacquire(signal, HSA_SIGNAL_CONDITION_LT, kInitSignalValueOne, + timeout, HSA_WAIT_STATE_ACTIVE) != 0) { + if (active_wait_timeout) { return false; } - } else { - - uint64_t timeout = kTimeout100us; - ClPrint(amd::LOG_INFO, amd::LOG_SIG, "Host wait until Signal = (0x%lx) decremented", + ClPrint(amd::LOG_INFO, amd::LOG_SIG, "Host blocked wait for Signal = (0x%lx)", signal.handle); - // Active wait with a timeout + // Wait until the completion with CPU suspend if (hsa_signal_wait_scacquire(signal, HSA_SIGNAL_CONDITION_LT, kInitSignalValueOne, - timeout, HSA_WAIT_STATE_ACTIVE) != 0) { - // Wait until the completion with CPU suspend - if (hsa_signal_wait_scacquire(signal, HSA_SIGNAL_CONDITION_LT, kInitSignalValueOne, - kUnlimitedWait, HSA_WAIT_STATE_BLOCKED) != 0) { - return false; - } + kUnlimitedWait, HSA_WAIT_STATE_BLOCKED) != 0) { + return false; } } } From 95b4993b9a68d86ed2d14c257714c78a443e0da6 Mon Sep 17 00:00:00 2001 From: jujiang Date: Tue, 13 Jul 2021 19:44:02 -0400 Subject: [PATCH 073/102] SWDEV-291787 - Fix persistent direct map Change-Id: Ic1507cc6d63e9ed574e8e169bce7bf56f4792c19 --- device/device.hpp | 3 ++- device/pal/palmemory.cpp | 14 ++++++++++---- device/pal/palmemory.hpp | 10 ++++++++++ device/pal/palresource.cpp | 6 +++--- device/pal/palresource.hpp | 3 ++- device/pal/palvirtual.cpp | 7 +++++-- 6 files changed, 32 insertions(+), 11 deletions(-) diff --git a/device/device.hpp b/device/device.hpp index 120e03ea7..115176f5e 100644 --- a/device/device.hpp +++ b/device/device.hpp @@ -841,7 +841,8 @@ class Memory : public amd::HeapObject { SubMemoryObject = 0x00000008, //!< Memory is sub-memory HostMemoryRegistered = 0x00000010, //!< Host memory was registered MemoryCpuUncached = 0x00000020, //!< Memory is uncached on CPU access(slow read) - AllowedPeerAccess = 0x00000040 //!< Memory can be accessed from peer + AllowedPeerAccess = 0x00000040, //!< Memory can be accessed from peer + PersistentMap = 0x00000080 //!< Map Peristent memory }; uint flags_; //!< Memory object flags diff --git a/device/pal/palmemory.cpp b/device/pal/palmemory.cpp index 8a5940d1a..29a4fe201 100644 --- a/device/pal/palmemory.cpp +++ b/device/pal/palmemory.cpp @@ -786,12 +786,16 @@ void* Memory::allocMapTarget(const amd::Coord3D& origin, const amd::Coord3D& reg mapAddress = reinterpret_cast
(owner()->getHostMem()); } // If resource is a persistent allocation, we can use it directly - else if (isPersistentDirectMap()) { + else if ((isPersistentDirectMap(mapFlags & CL_MAP_WRITE) && (getMapCount() == 0)) || + isPersistentMapped()) { if (nullptr == map(nullptr)) { LogError("Could not map target persistent resource"); decIndMapCount(); return nullptr; } + if (getMapCount() == 1) { + setPersistentMapFlag(true); + } mapAddress = data(); } // Otherwise we can use a remote resource: @@ -1046,14 +1050,17 @@ void* Image::allocMapTarget(const amd::Coord3D& origin, const amd::Coord3D& regi //! runtime can't use it directly, //! because CAL volume map doesn't work properly. //! @todo arrays can be added for persistent lock with some CAL changes - else if (isPersistentDirectMap()) { + else if((isPersistentDirectMap(mapFlags & CL_MAP_WRITE) && (getMapCount() == 0)) || + isPersistentMapped()) { if (nullptr == map(nullptr)) { useRemoteResource = true; LogError("Could not map target persistent resource, try remote resource"); } else { useRemoteResource = false; mapAddress = data(); - + if (getMapCount() == 1) { + setPersistentMapFlag(true); + } // Calculate the offset in bytes offset *= elementSize(); @@ -1075,7 +1082,6 @@ void* Image::allocMapTarget(const amd::Coord3D& origin, const amd::Coord3D& regi const static bool SysMem = true; bool failed = false; amd::Memory* memory; - // Search for a possible indirect resource memory = dev().findMapTarget(owner()->getSize()); diff --git a/device/pal/palmemory.hpp b/device/pal/palmemory.hpp index bc49c0f65..0c1cbd46a 100644 --- a/device/pal/palmemory.hpp +++ b/device/pal/palmemory.hpp @@ -97,6 +97,16 @@ class Memory : public device::Memory, public Resource { size_t* slicePitch = NULL //!< Slice for the mapped memory ); + virtual bool isPersistentMapped() const { return (flags_ & PersistentMap) ? true : false; } + virtual void setPersistentMapFlag(bool persistentMapped) { + if (persistentMapped == true) { + flags_ |= PersistentMap; + } + else { + flags_ &= ~PersistentMap; + } + } + //! Pins system memory associated with this memory object virtual bool pinSystemMemory(void* hostPtr, //!< System memory address size_t size //!< Size of allocated system memory diff --git a/device/pal/palresource.cpp b/device/pal/palresource.cpp index c084c0d15..a3de8b86e 100644 --- a/device/pal/palresource.cpp +++ b/device/pal/palresource.cpp @@ -1824,9 +1824,9 @@ bool Resource::isMemoryType(MemoryType memType) const { } // ================================================================================================ -bool Resource::isPersistentDirectMap() const { - bool directMap = - ((memoryType() == Resource::Persistent) && (desc().dimSize_ < 3) && !desc().imageArray_); +bool Resource::isPersistentDirectMap(bool writeMap) const { + bool directMap = ((memoryType() == Resource::Persistent) && + (desc().dimSize_ < 3) && !desc().imageArray_ && writeMap); // If direct map is possible, then validate it with the current tiling if (directMap && desc().tiled_) { diff --git a/device/pal/palresource.hpp b/device/pal/palresource.hpp index e6da7725d..c7108368b 100644 --- a/device/pal/palresource.hpp +++ b/device/pal/palresource.hpp @@ -296,7 +296,8 @@ class Resource : public amd::HeapObject { bool mipMapped() const { return (desc().mipLevels_ > 1) ? true : false; } //! Checks if persistent memory can have a direct map - bool isPersistentDirectMap() const; + bool isPersistentDirectMap(bool writeMap = true) const; + int getMapCount() const { return mapCount_; } /*! \brief Locks the resource and returns a physical pointer * diff --git a/device/pal/palvirtual.cpp b/device/pal/palvirtual.cpp index ea6d87a51..47ea8b4f2 100644 --- a/device/pal/palvirtual.cpp +++ b/device/pal/palvirtual.cpp @@ -1620,7 +1620,7 @@ void VirtualGPU::submitMapMemory(amd::MapMemoryCommand& vcmd) { // Add memory to VA cache, so rutnime can detect direct access to VA dev().addVACache(memory); - } else if (memory->isPersistentDirectMap()) { + } else if (memory->isPersistentMapped()) { // Nothing to do here } else if (memory->mapMemory() != nullptr) { // Target is a remote resource, so copy @@ -1721,10 +1721,13 @@ void VirtualGPU::submitUnmapMemory(amd::UnmapMemoryCommand& vcmd) { } // data check was added for persistent memory that failed to get aperture // and therefore are treated like a remote resource - else if (memory->isPersistentDirectMap() && (memory->data() != nullptr)) { + else if (memory->isPersistentMapped()) { // Map/unmap must be serialized amd::ScopedLock lock(owner->lockMemoryOps()); memory->unmap(this); + if (memory->getMapount() == 0) { + memory->setPersistentMapFlag(false); + } } else if (memory->mapMemory() != nullptr) { if (writeMapInfo->isUnmapWrite()) { amd::Coord3D srcOrigin(0, 0, 0); From 5b485e0f1d55af3357385a6fa0b5d475a10986d4 Mon Sep 17 00:00:00 2001 From: Vladislav Sytchenko Date: Mon, 19 Jul 2021 18:58:45 -0400 Subject: [PATCH 074/102] SWDEV-291787 - Fix Windows build Change-Id: I1b97a1100e4c498f53aaad7157f21d0c5f8a130d --- device/pal/palvirtual.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/device/pal/palvirtual.cpp b/device/pal/palvirtual.cpp index 47ea8b4f2..898367b71 100644 --- a/device/pal/palvirtual.cpp +++ b/device/pal/palvirtual.cpp @@ -1725,7 +1725,7 @@ void VirtualGPU::submitUnmapMemory(amd::UnmapMemoryCommand& vcmd) { // Map/unmap must be serialized amd::ScopedLock lock(owner->lockMemoryOps()); memory->unmap(this); - if (memory->getMapount() == 0) { + if (memory->getMapCount() == 0) { memory->setPersistentMapFlag(false); } } else if (memory->mapMemory() != nullptr) { From ff997c71d4bc4dae5b602873807660d530b2b811 Mon Sep 17 00:00:00 2001 From: Chauncey Hui Date: Tue, 20 Jul 2021 03:00:06 -0400 Subject: [PATCH 075/102] SWDEV-2 - Change OpenCL version number from 3339 to 3340 --- utils/versions.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/utils/versions.hpp b/utils/versions.hpp index 43e343c09..a14bc7cf8 100644 --- a/utils/versions.hpp +++ b/utils/versions.hpp @@ -28,7 +28,7 @@ #endif // AMD_PLATFORM_NAME #ifndef AMD_PLATFORM_BUILD_NUMBER -#define AMD_PLATFORM_BUILD_NUMBER 3339 +#define AMD_PLATFORM_BUILD_NUMBER 3340 #endif // AMD_PLATFORM_BUILD_NUMBER #ifndef AMD_PLATFORM_REVISION_NUMBER From ac1f5a8be5e556f7e7641796ed8396b0820ea2e5 Mon Sep 17 00:00:00 2001 From: Vladislav Sytchenko Date: Mon, 19 Jul 2021 22:42:38 -0400 Subject: [PATCH 076/102] SWDEV-283981 - Revert "SWDEV-283981 - [PAL] Support hostcall SQ interrupt" This reverts commit ce3bf0d998457f919aeb4a0e3bf3b58a5d5d6948. Reason for revert: need SWDEV-294782 to be resolved before we can enable SQ interrupt support. Change-Id: I328170b60f1a3aab28c0b1fd3191297a1a51ecb7 --- device/devhostcall.cpp | 6 ++-- device/pal/palsignal.cpp | 70 +--------------------------------------- device/pal/palsignal.hpp | 3 -- 3 files changed, 4 insertions(+), 75 deletions(-) diff --git a/device/devhostcall.cpp b/device/devhostcall.cpp index c49ecc012..a73c4451b 100644 --- a/device/devhostcall.cpp +++ b/device/devhostcall.cpp @@ -335,10 +335,10 @@ void HostcallListener::removeBuffer(HostcallBuffer* buffer) { bool HostcallListener::initialize(const amd::Device &dev) { doorbell_ = dev.createSignal(); -#if defined(WITH_PAL_DEVICE) && !defined(_WIN32) - auto ws = device::Signal::WaitState::Active; -#else +#ifdef WITH_HSA_DEVICE auto ws = device::Signal::WaitState::Blocked; +#else + auto ws = device::Signal::WaitState::Active; #endif if ((doorbell_ == nullptr) || !doorbell_->Init(dev, SIGNAL_INIT, ws)) { return false; diff --git a/device/pal/palsignal.cpp b/device/pal/palsignal.cpp index 076464741..4f7ac33cc 100644 --- a/device/pal/palsignal.cpp +++ b/device/pal/palsignal.cpp @@ -29,21 +29,6 @@ namespace pal { Signal::~Signal() { dev_->context().svmFree(amdSignal_); - - if (ws_ == device::Signal::WaitState::Blocked) { -#if defined(_WIN32) - Pal::Result result = Pal::Result::Success; - - Pal::UnregisterEventInfo eventInfo = {}; - eventInfo.pEvent = &event_; - eventInfo.trackingType = Pal::EventTrackingType::ShaderInterrupt; - result = dev_->iDev()->UnregisterEvent(eventInfo); - if (result != Pal::Result::Success) { - ClPrint(amd::LOG_ERROR, amd::LOG_QUEUE, - "Failed to unregister SQ event needed for hostcall buffer"); - } -#endif - } } bool Signal::Init(const amd::Device& dev, uint64_t init, device::Signal::WaitState ws) { @@ -62,47 +47,6 @@ bool Signal::Init(const amd::Device& dev, uint64_t init, device::Signal::WaitSta amdSignal_ = new (buffer) amd_signal_t(); amdSignal_->value = init; - if (ws_ == device::Signal::WaitState::Blocked) { -#if defined(_WIN32) - Pal::Result result = Pal::Result::Success; - - Util::EventCreateFlags flags = {}; - flags.manualReset = false; - flags.initiallySignaled = false; - result = event_.Init(flags); - if (result != Pal::Result::Success) { - ClPrint(amd::LOG_ERROR, amd::LOG_QUEUE, - "Failed to create Pal::Util::Event needed for hostcall buffer"); - return false; - } - - result = event_.Set(); - if (result != Pal::Result::Success) { - ClPrint(amd::LOG_ERROR, amd::LOG_QUEUE, - "Failed to set Pal::Util::Event needed for hostcall buffer"); - return false; - } - - Pal::RegisterEventInfo eventInputInfo = {}; - eventInputInfo.pEvent = &event_; - eventInputInfo.trackingType = Pal::EventTrackingType::ShaderInterrupt; - Pal::RegisterEventOutputInfo eventOutputInfo = {}; - result = dev_->iDev()->RegisterEvent( - eventInputInfo, - &eventOutputInfo); - if (result != Pal::Result::Success) { - ClPrint(amd::LOG_ERROR, amd::LOG_QUEUE, - "Failed to register SQ event needed for hostcall buffer"); - return false; - } - amdSignal_->event_id = eventOutputInfo.shaderInterrupt.eventId; - amdSignal_->event_mailbox_ptr = eventOutputInfo.shaderInterrupt.eventMailboxGpuVa; - ClPrint(amd::LOG_INFO, amd::LOG_INIT, - "Registered SQ event %d with mailbox slot %p", - amdSignal_->event_id, amdSignal_->event_mailbox_ptr); -#endif - } - return true; } @@ -123,19 +67,7 @@ uint64_t Signal::Wait(uint64_t value, device::Signal::Condition c, uint64_t time } (c); if (ws_ == device::Signal::WaitState::Blocked) { -#if defined(_WIN32) - Pal::Result result = Pal::Result::Success; - - float timeoutInSec = timeout / (1000 * 1000); - result = event_.Wait(timeoutInSec); - - if (result != Pal::Result::Success) { - return -1; - } - - std::atomic_thread_fence(std::memory_order_acquire); - return amdSignal_->value; -#endif + guarantee(false, "Unimplemented"); } else if (ws_ == device::Signal::WaitState::Active) { auto start = amd::Os::timeNanos(); while (true) { diff --git a/device/pal/palsignal.hpp b/device/pal/palsignal.hpp index 781cbd6b1..d043c6a92 100644 --- a/device/pal/palsignal.hpp +++ b/device/pal/palsignal.hpp @@ -24,8 +24,6 @@ #include -#include "palEvent.h" - namespace pal { class Device; @@ -34,7 +32,6 @@ class Signal: public device::Signal { private: const Device* dev_; amd_signal_t* amdSignal_; - Util::Event event_; public: ~Signal() override; From 871e8f808c2f556793a7ac8033bbd40518684b65 Mon Sep 17 00:00:00 2001 From: Chauncey Hui Date: Wed, 21 Jul 2021 03:00:07 -0400 Subject: [PATCH 077/102] SWDEV-2 - Change OpenCL version number from 3340 to 3341 --- utils/versions.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/utils/versions.hpp b/utils/versions.hpp index a14bc7cf8..7bdc8d8c0 100644 --- a/utils/versions.hpp +++ b/utils/versions.hpp @@ -28,7 +28,7 @@ #endif // AMD_PLATFORM_NAME #ifndef AMD_PLATFORM_BUILD_NUMBER -#define AMD_PLATFORM_BUILD_NUMBER 3340 +#define AMD_PLATFORM_BUILD_NUMBER 3341 #endif // AMD_PLATFORM_BUILD_NUMBER #ifndef AMD_PLATFORM_REVISION_NUMBER From dd3fed2e922ab82551e9fb28e6e3f5e0ec838a03 Mon Sep 17 00:00:00 2001 From: Sourabh Betigeri Date: Thu, 15 Jul 2021 17:19:19 -0700 Subject: [PATCH 078/102] SWDEV-292523 - [vdi]Change in the signature of streamOperations APIs, particularly 'value' arg to unsigned 'value Change-Id: I74b24b2dec911acd5e7a364ea8c050c2ecb1c3b8 --- platform/command.hpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/platform/command.hpp b/platform/command.hpp index 91a412f4e..3dd726ea5 100644 --- a/platform/command.hpp +++ b/platform/command.hpp @@ -646,7 +646,7 @@ class FillMemoryCommand : public OneMemoryArgCommand { class StreamOperationCommand : public OneMemoryArgCommand { private: - int64_t value_; // !< Value to Wait on or to Write. + uint64_t value_; // !< Value to Wait on or to Write. uint64_t mask_; // !< Mask to be applied on signal value for Wait operation. unsigned int flags_; // !< Flags defining the Wait condition. size_t offset_; // !< Offset into memory for Write @@ -657,7 +657,7 @@ class StreamOperationCommand : public OneMemoryArgCommand { public: StreamOperationCommand(HostQueue& queue, cl_command_type cmdType, - const EventWaitList& eventWaitList, Memory& memory, const int64_t value, + const EventWaitList& eventWaitList, Memory& memory, const uint64_t value, const uint64_t mask, unsigned int flags, size_t offset, size_t sizeBytes) : OneMemoryArgCommand(queue, cmdType, eventWaitList, memory), value_(value), @@ -675,7 +675,7 @@ class StreamOperationCommand : public OneMemoryArgCommand { virtual void submit(device::VirtualDevice& device) { device.submitStreamOperation(*this); } //! Returns the value - const int64_t value() const { return value_; } + const uint64_t value() const { return value_; } //! Returns the wait mask const uint64_t mask() const { return mask_; } //! Return the wait flags From 1c46ab4c983cb73f57ce99688217d984b12997a0 Mon Sep 17 00:00:00 2001 From: Satyanvesh Dittakavi Date: Tue, 20 Jul 2021 15:14:32 +0000 Subject: [PATCH 079/102] SWDEV-292021 - Fix Device Reset - Device Reset should not purge the allocations that were not by the user - Addresses QMCPack Test abort due to the removal of all the mem objects during reset Change-Id: I7b7a123e72bcc985d7e51d17c2382bc618d3e041 --- device/device.cpp | 8 +++++--- device/device.hpp | 2 +- device/pal/palprogram.cpp | 1 + device/rocm/rocprogram.cpp | 5 +++-- platform/memory.hpp | 3 ++- 5 files changed, 12 insertions(+), 7 deletions(-) diff --git a/device/device.cpp b/device/device.cpp index 2b6b81995..40e124e51 100644 --- a/device/device.cpp +++ b/device/device.cpp @@ -333,9 +333,11 @@ void MemObjMap::Purge(amd::Device* dev) { assert(dev != nullptr); amd::ScopedLock lock(AllocatedLock_); - for (auto it = MemObjMap_.cbegin() ; it != MemObjMap_.cend() ;) { - const std::vector& devices = it->second->getContext().devices(); - if (devices.size() == 1 && devices[0] == dev) { + for (auto it = MemObjMap_.cbegin(); it != MemObjMap_.cend(); ) { + amd::Memory* memObj = it->second; + unsigned int flags = memObj->getMemFlags(); + const std::vector& devices = memObj->getContext().devices(); + if (devices.size() == 1 && devices[0] == dev && !(flags & ROCCLR_MEM_INTERNAL_MEMORY)) { it = MemObjMap_.erase(it); } else { ++it; diff --git a/device/device.hpp b/device/device.hpp index 115176f5e..845b65df2 100644 --- a/device/device.hpp +++ b/device/device.hpp @@ -1246,7 +1246,7 @@ class MemObjMap : public AllStatic { static amd::Memory* FindMemObj( const void* k); //!< find the mem object based on the input pointer static void UpdateAccess(amd::Device *peerDev); - static void Purge(amd::Device*dev); //!< Purge all the memories on the given device + static void Purge(amd::Device* dev); //!< Purge all user allocated memories on the given device private: static std::map MemObjMap_; //!< the mem object<->hostptr information container diff --git a/device/pal/palprogram.cpp b/device/pal/palprogram.cpp index 3cc213466..f6e319b9b 100644 --- a/device/pal/palprogram.cpp +++ b/device/pal/palprogram.cpp @@ -508,6 +508,7 @@ bool HSAILProgram::createGlobalVarObj(amd::Memory** amd_mem_obj, void** device_p } /* Create a View from the global pal::Memory */ + flags = ROCCLR_MEM_INTERNAL_MEMORY; parent = codeSegGpu_->owner(); *amd_mem_obj = new (parent->getContext()) amd::Buffer(*parent, flags, offset, *bytes); diff --git a/device/rocm/rocprogram.cpp b/device/rocm/rocprogram.cpp index 1959f33a9..32aed3d55 100644 --- a/device/rocm/rocprogram.cpp +++ b/device/rocm/rocprogram.cpp @@ -190,8 +190,9 @@ bool Program::createGlobalVarObj(amd::Memory** amd_mem_obj, void** device_pptr, } roc_device = &(rocDevice()); - *amd_mem_obj = new(roc_device->context()) amd::Buffer(roc_device->context(), 0, *bytes, - *device_pptr); + *amd_mem_obj = new(roc_device->context()) amd::Buffer(roc_device->context(), + ROCCLR_MEM_INTERNAL_MEMORY, + *bytes, *device_pptr); if (*amd_mem_obj == nullptr) { buildLog_ += "[OCL] Failed to create a mem object!"; diff --git a/platform/memory.hpp b/platform/memory.hpp index 1b842dee3..0906921e6 100644 --- a/platform/memory.hpp +++ b/platform/memory.hpp @@ -38,7 +38,8 @@ #include #include #define CL_MEM_FOLLOW_USER_NUMA_POLICY (1u << 31) -#define ROCCLR_MEM_HSA_SIGNAL_MEMORY (1u << 30) +#define ROCCLR_MEM_HSA_SIGNAL_MEMORY (1u << 30) +#define ROCCLR_MEM_INTERNAL_MEMORY (1u << 29) namespace device { class Memory; From 5911c06086876eba2500169fd9639b72785a9b58 Mon Sep 17 00:00:00 2001 From: Chauncey Hui Date: Thu, 22 Jul 2021 03:00:06 -0400 Subject: [PATCH 080/102] SWDEV-2 - Change OpenCL version number from 3341 to 3342 --- utils/versions.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/utils/versions.hpp b/utils/versions.hpp index 7bdc8d8c0..c3b451b19 100644 --- a/utils/versions.hpp +++ b/utils/versions.hpp @@ -28,7 +28,7 @@ #endif // AMD_PLATFORM_NAME #ifndef AMD_PLATFORM_BUILD_NUMBER -#define AMD_PLATFORM_BUILD_NUMBER 3341 +#define AMD_PLATFORM_BUILD_NUMBER 3342 #endif // AMD_PLATFORM_BUILD_NUMBER #ifndef AMD_PLATFORM_REVISION_NUMBER From 37d871167c8b101210696c7437d91e0456b91355 Mon Sep 17 00:00:00 2001 From: Aaron Liu Date: Wed, 7 Jul 2021 10:32:52 +0800 Subject: [PATCH 081/102] SWDEV-294027 - [Lnx][YC] Add Yellow Carp support Only add Roc path and don't use Pal path. Signed-off-by: Aaron Liu Change-Id: I7117e2dc3c3ad4c8d563e9bbdc721f70ddba51fd --- device/device.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/device/device.cpp b/device/device.cpp index 40e124e51..ba90a7f4c 100644 --- a/device/device.cpp +++ b/device/device.cpp @@ -187,6 +187,7 @@ std::pair Isa::supportedIsas() { {"gfx1032", "gfx1032", true, true, false, 10, 3, 2, NONE, NONE, 2, 32, 1, 256, 64 * Ki, 32}, {"gfx1033", "gfx1033", true, false, false, 10, 3, 3, NONE, NONE, 2, 32, 1, 256, 64 * Ki, 32}, {"gfx1034", "gfx1034", true, true, false, 10, 3, 4, NONE, NONE, 2, 32, 1, 256, 64 * Ki, 32}, + {"gfx1035", "gfx1035", true, false, false, 10, 3, 5, NONE, NONE, 2, 32, 1, 256, 64 * Ki, 32}, }; return std::make_pair(std::begin(supportedIsas_), std::end(supportedIsas_)); } From 36b5908eefa840757d9a0de3351d5228623f1414 Mon Sep 17 00:00:00 2001 From: Chauncey Hui Date: Fri, 23 Jul 2021 03:00:07 -0400 Subject: [PATCH 082/102] SWDEV-2 - Change OpenCL version number from 3342 to 3343 --- utils/versions.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/utils/versions.hpp b/utils/versions.hpp index c3b451b19..8ea5f6817 100644 --- a/utils/versions.hpp +++ b/utils/versions.hpp @@ -28,7 +28,7 @@ #endif // AMD_PLATFORM_NAME #ifndef AMD_PLATFORM_BUILD_NUMBER -#define AMD_PLATFORM_BUILD_NUMBER 3342 +#define AMD_PLATFORM_BUILD_NUMBER 3343 #endif // AMD_PLATFORM_BUILD_NUMBER #ifndef AMD_PLATFORM_REVISION_NUMBER From b11e88c55a1657716d2e19e9defd7692b4f99374 Mon Sep 17 00:00:00 2001 From: anusha GodavarthySurya Date: Mon, 26 Jul 2021 14:26:32 -0700 Subject: [PATCH 083/102] SWDEV-295251 - Remove waitEvent check in append Change-Id: I994f3e7c67ed29c4ee46229c8bcd1448fc7f59ec --- platform/commandqueue.cpp | 30 ++++++++++++++---------------- 1 file changed, 14 insertions(+), 16 deletions(-) diff --git a/platform/commandqueue.cpp b/platform/commandqueue.cpp index 4de1a0aca..2cc44abb3 100644 --- a/platform/commandqueue.cpp +++ b/platform/commandqueue.cpp @@ -221,23 +221,21 @@ void HostQueue::append(Command& command) { return; } - if (command.waitingEvent() == nullptr) { - // Set last submitted command - Command* prevLastEnqueueCommand; - command.retain(); - { - // lastCmdLock_ ensures that lastEnqueueCommand() can retain the command before it is swapped - // out. We want to keep this critical section as short as possible, so the command should be - // released outside this section. - ScopedLock l(lastCmdLock_); - - prevLastEnqueueCommand = lastEnqueueCommand_; - lastEnqueueCommand_ = &command; - } + // Set last submitted command + Command* prevLastEnqueueCommand; + command.retain(); + { + // lastCmdLock_ ensures that lastEnqueueCommand() can retain the command before it is swapped + // out. We want to keep this critical section as short as possible, so the command should be + // released outside this section. + ScopedLock l(lastCmdLock_); - if (prevLastEnqueueCommand != nullptr) { - prevLastEnqueueCommand->release(); - } + prevLastEnqueueCommand = lastEnqueueCommand_; + lastEnqueueCommand_ = &command; + } + + if (prevLastEnqueueCommand != nullptr) { + prevLastEnqueueCommand->release(); } } From bedef4cce5764fb8fa8601869b8d00274d959173 Mon Sep 17 00:00:00 2001 From: anusha GodavarthySurya Date: Thu, 22 Jul 2021 08:03:31 -0700 Subject: [PATCH 084/102] SWDEV-240806 - Add methods to update kernel command parameters Change-Id: Iba90a31f9c5d6d4f2b60b7ccf903325c03d4d245 --- platform/command.hpp | 9 +++++++++ platform/ndrange.hpp | 10 ++++++++++ 2 files changed, 19 insertions(+) diff --git a/platform/command.hpp b/platform/command.hpp index 3dd726ea5..31083b6c7 100644 --- a/platform/command.hpp +++ b/platform/command.hpp @@ -915,9 +915,18 @@ class NDRangeKernelCommand : public Command { //! Return the kernel NDRange. const NDRangeContainer& sizes() const { return sizes_; } + //! updates kernel NDRange. + void setSizes(const size_t* globalWorkOffset, const size_t* globalWorkSize, + const size_t* localWorkSize) { + sizes_.update(3, globalWorkOffset, globalWorkSize, localWorkSize); + } + //! Return the shared memory size uint32_t sharedMemBytes() const { return sharedMemBytes_; } + //! updates shared memory size + uint32_t setSharedMemBytes(uint32_t sharedMemBytes) { sharedMemBytes_ = sharedMemBytes; } + //! Return the cooperative groups mode bool cooperativeGroups() const { return (extraParam_ & CooperativeGroups) ? true : false; } diff --git a/platform/ndrange.hpp b/platform/ndrange.hpp index 14a52ef35..cb6f926ce 100644 --- a/platform/ndrange.hpp +++ b/platform/ndrange.hpp @@ -136,6 +136,16 @@ class NDRangeContainer : public HeapObject { } } + //! updates nd-range container + void update(size_t dimensions, const size_t* globalWorkOffset, const size_t* globalWorkSize, + const size_t* localWorkSize) { + for (size_t i = 0; i < dimensions; ++i) { + offset_[i] = globalWorkOffset != NULL ? globalWorkOffset[i] : 0; + global_[i] = globalWorkSize[i]; + local_[i] = localWorkSize[i]; + } + } + //! Return the number of dimensions. size_t dimensions() const { return dimensions_; } From cfe013df942daf429696125dda402c82698374ea Mon Sep 17 00:00:00 2001 From: Chauncey Hui Date: Tue, 27 Jul 2021 03:00:06 -0400 Subject: [PATCH 085/102] SWDEV-2 - Change OpenCL version number from 3343 to 3344 --- utils/versions.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/utils/versions.hpp b/utils/versions.hpp index 8ea5f6817..b06f801d8 100644 --- a/utils/versions.hpp +++ b/utils/versions.hpp @@ -28,7 +28,7 @@ #endif // AMD_PLATFORM_NAME #ifndef AMD_PLATFORM_BUILD_NUMBER -#define AMD_PLATFORM_BUILD_NUMBER 3343 +#define AMD_PLATFORM_BUILD_NUMBER 3344 #endif // AMD_PLATFORM_BUILD_NUMBER #ifndef AMD_PLATFORM_REVISION_NUMBER From f96fa05d893e16b6f4e4720abefa9a80dc0dcb9a Mon Sep 17 00:00:00 2001 From: Vladislav Sytchenko Date: Tue, 27 Jul 2021 17:44:21 -0400 Subject: [PATCH 086/102] SWDEV-293519 - [PAL] Limit mgpu SVM logic only to mgpu cases Below logic allocates the host buffer whenever a subbuffer is created from a SVM allocation. This is only needed for multi-device contexts. HIP does not support multi-device contexts, hence this logic just ends up performing unnecessary system allocations. Change-Id: I8eae635f7c5289c52ef73434218c1658b788a456 --- device/pal/paldevice.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/device/pal/paldevice.cpp b/device/pal/paldevice.cpp index b37b674d1..33906557d 100644 --- a/device/pal/paldevice.cpp +++ b/device/pal/paldevice.cpp @@ -1396,7 +1396,8 @@ pal::Memory* Device::createBuffer(amd::Memory& owner, bool directAccess) const { return nullptr; } - if (nullptr != owner.parent()->getSvmPtr()) { + if ((nullptr != owner.parent()->getSvmPtr()) && + (owner.parent()->getContext().devices().size() > 1)) { amd::Memory* amdParent = owner.parent(); { // Lock memory object, so only one commitment will occur From 6cb3c1cff235a874b256edda06d894edbf187ed7 Mon Sep 17 00:00:00 2001 From: Vladislav Sytchenko Date: Wed, 28 Jul 2021 11:33:00 -0400 Subject: [PATCH 087/102] SWDEV-240806 - Fix Windows build Fixes error "All control paths should return a value". Change-Id: I4718688b55b24862465e15ea0d64b32fa44b3299 --- platform/command.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/platform/command.hpp b/platform/command.hpp index 31083b6c7..47bc74ac9 100644 --- a/platform/command.hpp +++ b/platform/command.hpp @@ -925,7 +925,7 @@ class NDRangeKernelCommand : public Command { uint32_t sharedMemBytes() const { return sharedMemBytes_; } //! updates shared memory size - uint32_t setSharedMemBytes(uint32_t sharedMemBytes) { sharedMemBytes_ = sharedMemBytes; } + void setSharedMemBytes(uint32_t sharedMemBytes) { sharedMemBytes_ = sharedMemBytes; } //! Return the cooperative groups mode bool cooperativeGroups() const { return (extraParam_ & CooperativeGroups) ? true : false; } From 661529951ee63a99ddd9baf02a0dd6fe6ed23834 Mon Sep 17 00:00:00 2001 From: anusha GodavarthySurya Date: Tue, 27 Jul 2021 00:31:28 -0700 Subject: [PATCH 088/102] SWDEV-295251 - Avoid marker if queue is empty for DD to fix MT issue Change-Id: I80be39ace9d93347f81ef8acd7858d43bc4a3f1e --- platform/commandqueue.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/platform/commandqueue.cpp b/platform/commandqueue.cpp index 2cc44abb3..0e904aab8 100644 --- a/platform/commandqueue.cpp +++ b/platform/commandqueue.cpp @@ -108,7 +108,7 @@ void HostQueue::finish() { if (IS_HIP) { command = getLastQueuedCommand(true); // Check if the queue has nothing to process and return - if (command == nullptr) { + if (AMD_DIRECT_DISPATCH && command == nullptr) { return; } } From 691e495ae149c83b94252f3b4929f21a7eb33ef2 Mon Sep 17 00:00:00 2001 From: Chauncey Hui Date: Thu, 29 Jul 2021 03:00:04 -0400 Subject: [PATCH 089/102] SWDEV-2 - Change OpenCL version number from 3344 to 3345 --- utils/versions.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/utils/versions.hpp b/utils/versions.hpp index b06f801d8..7d8bb57b1 100644 --- a/utils/versions.hpp +++ b/utils/versions.hpp @@ -28,7 +28,7 @@ #endif // AMD_PLATFORM_NAME #ifndef AMD_PLATFORM_BUILD_NUMBER -#define AMD_PLATFORM_BUILD_NUMBER 3344 +#define AMD_PLATFORM_BUILD_NUMBER 3345 #endif // AMD_PLATFORM_BUILD_NUMBER #ifndef AMD_PLATFORM_REVISION_NUMBER From 69a02ff5e854ee818a1443396cfc2de91fb3f9d0 Mon Sep 17 00:00:00 2001 From: kjayapra-amd Date: Mon, 26 Jul 2021 17:34:31 -0400 Subject: [PATCH 090/102] SWDEV-295144 - Change uint64_t to size_t to fix failure on 32-bit opencl. Change-Id: I5c28e9c606dec1c956f3f48071d8a0271adfff22 --- platform/memory.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/platform/memory.hpp b/platform/memory.hpp index 0906921e6..1135fad02 100644 --- a/platform/memory.hpp +++ b/platform/memory.hpp @@ -651,7 +651,7 @@ class LiquidFlashFile : public RuntimeObject { class ArenaMemory: public Buffer { public: ArenaMemory(Context& context) - : Buffer(context, 0, std::numeric_limits::max(), + : Buffer(context, 0, std::numeric_limits::max(), reinterpret_cast(kArenaMemoryPtr)) {} }; From 882ed4947f58a3fc95cea8b3e7a5c2c09f0f8eeb Mon Sep 17 00:00:00 2001 From: Alex Xie Date: Mon, 31 May 2021 14:11:03 -0400 Subject: [PATCH 091/102] SWDEV-288853 - [OpenCL]: ASIC 1013 Bringup This a cherry pick from the ASIC's branch. Change-Id: Ic6e888f8fa96103d1e79432dd75e68faabd8cf6c --- device/device.cpp | 3 +++ 1 file changed, 3 insertions(+) diff --git a/device/device.cpp b/device/device.cpp index ba90a7f4c..150a7b540 100644 --- a/device/device.cpp +++ b/device/device.cpp @@ -182,6 +182,9 @@ std::pair Isa::supportedIsas() { {"gfx1012", "gfx1012", true, true, false, 10, 1, 2, NONE, ANY, 2, 32, 1, 256, 64 * Ki, 32}, {"gfx1012:xnack-", "gfx1012", true, true, false, 10, 1, 2, NONE, OFF, 2, 32, 1, 256, 64 * Ki, 32}, {"gfx1012:xnack+", nullptr, true, true, false, 10, 1, 2, NONE, ON, 2, 32, 1, 256, 64 * Ki, 32}, + {"gfx1013", "gfx1013", true, false, false, 10, 1, 3, NONE, ANY, 2, 32, 1, 256, 64 * Ki, 32}, + {"gfx1013:xnack-", "gfx1013", true, false, false, 10, 1, 3, NONE, OFF, 2, 32, 1, 256, 64 * Ki, 32}, + {"gfx1013:xnack+", nullptr, true, false, false, 10, 1, 3, NONE, ON, 2, 32, 1, 256, 64 * Ki, 32}, {"gfx1030", "gfx1030", true, true, false, 10, 3, 0, NONE, NONE, 2, 32, 1, 256, 64 * Ki, 32}, {"gfx1031", "gfx1031", true, true, false, 10, 3, 1, NONE, NONE, 2, 32, 1, 256, 64 * Ki, 32}, {"gfx1032", "gfx1032", true, true, false, 10, 3, 2, NONE, NONE, 2, 32, 1, 256, 64 * Ki, 32}, From 375e4c67713df39fcd22914a55ee718aa9375c9b Mon Sep 17 00:00:00 2001 From: Jason Tang Date: Wed, 28 Jul 2021 17:18:21 -0400 Subject: [PATCH 092/102] SWDEV-296911 - Enable clgl interop for both MesaGL and OrcaGL Change-Id: Ie3ad85a8335b1fc751812c09bb0cd30aad38dcae --- cmake/ROCclr.cmake | 2 +- device/rocm/rocmemory.cpp | 9 ++++----- device/rocm/rocsettings.cpp | 2 -- 3 files changed, 5 insertions(+), 8 deletions(-) diff --git a/cmake/ROCclr.cmake b/cmake/ROCclr.cmake index 9b1fced62..0eebda08a 100644 --- a/cmake/ROCclr.cmake +++ b/cmake/ROCclr.cmake @@ -96,7 +96,7 @@ else() option(BUILD_LINUXPRO "Build LinuxPro" OFF) if(BUILD_LINUXPRO) target_compile_definitions(rocclr PUBLIC - ROCCLR_DISABLE_PREVEGA ROCCLR_ENABLE_GL_SHARING) + ROCCLR_DISABLE_PREVEGA ) endif() endif() diff --git a/device/rocm/rocmemory.cpp b/device/rocm/rocmemory.cpp index 5a35d8503..f4e6ee1e8 100644 --- a/device/rocm/rocmemory.cpp +++ b/device/rocm/rocmemory.cpp @@ -269,13 +269,12 @@ bool Memory::createInteropBuffer(GLenum targetType, int miplevel) { if (status != HSA_STATUS_SUCCESS) return false; - //! @todo Need to handle metadata correctly -#if 0 - // if map_buffer wrote anything in metadata, copy it to amdImageDesc_ - if (metadata_size != 0) { + // if map_buffer wrote a legitimate SRD, copy it to amdImageDesc_ + if ((metadata_size != 0) && + (reinterpret_cast(metadata)->deviceID == + amdImageDesc_->deviceID)) { memcpy(amdImageDesc_, metadata, metadata_size); } -#endif //0 kind_ = MEMORY_KIND_INTEROP; assert(deviceMemory_ != nullptr && "Interop map failed to produce a pointer!"); diff --git a/device/rocm/rocsettings.cpp b/device/rocm/rocsettings.cpp index a1845caa9..17c940e3f 100644 --- a/device/rocm/rocsettings.cpp +++ b/device/rocm/rocsettings.cpp @@ -129,11 +129,9 @@ bool Settings::create(bool fullProfile, uint32_t gfxipMajor, uint32_t gfxipMinor enableExtension(ClAmdMediaOps2); enableExtension(ClKhrImage2dFromBuffer); -#ifdef ROCCLR_ENABLE_GL_SHARING if (MesaInterop::Supported()) { enableExtension(ClKhrGlSharing); } -#endif // Enable platform extension enableExtension(ClAmdDeviceAttributeQuery); From a70d777656a1d912cd816819b8d4e381eefd3a9b Mon Sep 17 00:00:00 2001 From: Rahul Garg Date: Thu, 29 Jul 2021 19:50:51 +0000 Subject: [PATCH 093/102] SWDEV-293742 - Remove external refs Change-Id: Ib9e25a6beb97cc042bb3cc50338686a8dd09e21c --- device/rocm/rocregisters.hpp | 7 ------- 1 file changed, 7 deletions(-) diff --git a/device/rocm/rocregisters.hpp b/device/rocm/rocregisters.hpp index 4fac38e85..fa05ff98e 100644 --- a/device/rocm/rocregisters.hpp +++ b/device/rocm/rocregisters.hpp @@ -18,13 +18,6 @@ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ -/* -Definitions taken from Mesa radeonsi and GCN3 isa manual. -https://cgit.freedesktop.org/mesa/mesa/tree/src/gallium/drivers/radeonsi/sid.h -http://gpuopen.com/compute-product/amd-gcn3-isa-architecture-manual/ - -WORD7 is defined in mesa but has no fields and isn't in GCN3 doc. Can I use this? -*/ #pragma once #ifndef WITHOUT_HSA_BACKEND From 035733c71e5baabbbb6e812dbf67a758c41f04c7 Mon Sep 17 00:00:00 2001 From: Chauncey Hui Date: Fri, 30 Jul 2021 03:00:05 -0400 Subject: [PATCH 094/102] SWDEV-2 - Change OpenCL version number from 3345 to 3346 --- utils/versions.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/utils/versions.hpp b/utils/versions.hpp index 7d8bb57b1..aaf6948d9 100644 --- a/utils/versions.hpp +++ b/utils/versions.hpp @@ -28,7 +28,7 @@ #endif // AMD_PLATFORM_NAME #ifndef AMD_PLATFORM_BUILD_NUMBER -#define AMD_PLATFORM_BUILD_NUMBER 3345 +#define AMD_PLATFORM_BUILD_NUMBER 3346 #endif // AMD_PLATFORM_BUILD_NUMBER #ifndef AMD_PLATFORM_REVISION_NUMBER From 0afd6e17cf66b700cedbda3247f1496cfa5ba692 Mon Sep 17 00:00:00 2001 From: Vladislav Sytchenko Date: Fri, 30 Jul 2021 12:02:19 -0400 Subject: [PATCH 095/102] SWDEV-273235 - Add ROCclrCompilerOptions cmake module This module will be used to add any specific compiler options to ROCclr and it's clients. Currently it only adds a workaround to remove the MSVC flag /GR, which is added by default CMake <3.20. This resolves the conflict of PAL adding /GR-. Change-Id: If83adb271bcec86812a6e9de940da3920fc75393 --- cmake/ROCclr.cmake | 2 ++ cmake/ROCclrCompilerOptions.cmake | 31 +++++++++++++++++++++++++++++++ 2 files changed, 33 insertions(+) create mode 100644 cmake/ROCclrCompilerOptions.cmake diff --git a/cmake/ROCclr.cmake b/cmake/ROCclr.cmake index 0eebda08a..fd09d38b1 100644 --- a/cmake/ROCclr.cmake +++ b/cmake/ROCclr.cmake @@ -42,6 +42,8 @@ find_package(AMD_OPENCL) add_library(rocclr STATIC) +include(ROCclrCompilerOptions) + set(ROCCLR_SRC_DIR "${CMAKE_CURRENT_LIST_DIR}/..") mark_as_advanced(ROCCLR_SRC_DIR) diff --git a/cmake/ROCclrCompilerOptions.cmake b/cmake/ROCclrCompilerOptions.cmake new file mode 100644 index 000000000..2dae3dab0 --- /dev/null +++ b/cmake/ROCclrCompilerOptions.cmake @@ -0,0 +1,31 @@ +# Copyright (c) 2020 - 2021 Advanced Micro Devices, Inc. All rights reserved. +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +# THE SOFTWARE. + +include_guard() + +if (CMAKE_CXX_COMPILER_ID STREQUAL "MSVC") + if (CMAKE_VERSION VERSION_LESS "3.20") + # This code is neccessary to avoid this command line warning: + # "Overriding /GR with /GR- cl: command line warning D9025" + # + # /GR is implied by MSVC anyway. So getting rid of it doesn't matter. + string(REPLACE "/GR" "" CMAKE_CXX_FLAGS ${CMAKE_CXX_FLAGS}) + endif() +endif() From ab15da439df75ed427d14b64c6346ab049a92a7c Mon Sep 17 00:00:00 2001 From: Vladislav Sytchenko Date: Fri, 30 Jul 2021 12:45:11 -0400 Subject: [PATCH 096/102] SWDEV-283981 - [PAL] Support hostcall SQ interrupt Note that this requires base driver CL#2340320+ to have SQ interrupt functionality enabled by default. Change-Id: I04b936819ebe1eb7cf5de1db4fafe83af3a1b5f6 --- device/devhostcall.cpp | 6 ++-- device/pal/palsignal.cpp | 70 +++++++++++++++++++++++++++++++++++++++- device/pal/palsignal.hpp | 3 ++ 3 files changed, 75 insertions(+), 4 deletions(-) diff --git a/device/devhostcall.cpp b/device/devhostcall.cpp index a73c4451b..c49ecc012 100644 --- a/device/devhostcall.cpp +++ b/device/devhostcall.cpp @@ -335,10 +335,10 @@ void HostcallListener::removeBuffer(HostcallBuffer* buffer) { bool HostcallListener::initialize(const amd::Device &dev) { doorbell_ = dev.createSignal(); -#ifdef WITH_HSA_DEVICE - auto ws = device::Signal::WaitState::Blocked; -#else +#if defined(WITH_PAL_DEVICE) && !defined(_WIN32) auto ws = device::Signal::WaitState::Active; +#else + auto ws = device::Signal::WaitState::Blocked; #endif if ((doorbell_ == nullptr) || !doorbell_->Init(dev, SIGNAL_INIT, ws)) { return false; diff --git a/device/pal/palsignal.cpp b/device/pal/palsignal.cpp index 4f7ac33cc..076464741 100644 --- a/device/pal/palsignal.cpp +++ b/device/pal/palsignal.cpp @@ -29,6 +29,21 @@ namespace pal { Signal::~Signal() { dev_->context().svmFree(amdSignal_); + + if (ws_ == device::Signal::WaitState::Blocked) { +#if defined(_WIN32) + Pal::Result result = Pal::Result::Success; + + Pal::UnregisterEventInfo eventInfo = {}; + eventInfo.pEvent = &event_; + eventInfo.trackingType = Pal::EventTrackingType::ShaderInterrupt; + result = dev_->iDev()->UnregisterEvent(eventInfo); + if (result != Pal::Result::Success) { + ClPrint(amd::LOG_ERROR, amd::LOG_QUEUE, + "Failed to unregister SQ event needed for hostcall buffer"); + } +#endif + } } bool Signal::Init(const amd::Device& dev, uint64_t init, device::Signal::WaitState ws) { @@ -47,6 +62,47 @@ bool Signal::Init(const amd::Device& dev, uint64_t init, device::Signal::WaitSta amdSignal_ = new (buffer) amd_signal_t(); amdSignal_->value = init; + if (ws_ == device::Signal::WaitState::Blocked) { +#if defined(_WIN32) + Pal::Result result = Pal::Result::Success; + + Util::EventCreateFlags flags = {}; + flags.manualReset = false; + flags.initiallySignaled = false; + result = event_.Init(flags); + if (result != Pal::Result::Success) { + ClPrint(amd::LOG_ERROR, amd::LOG_QUEUE, + "Failed to create Pal::Util::Event needed for hostcall buffer"); + return false; + } + + result = event_.Set(); + if (result != Pal::Result::Success) { + ClPrint(amd::LOG_ERROR, amd::LOG_QUEUE, + "Failed to set Pal::Util::Event needed for hostcall buffer"); + return false; + } + + Pal::RegisterEventInfo eventInputInfo = {}; + eventInputInfo.pEvent = &event_; + eventInputInfo.trackingType = Pal::EventTrackingType::ShaderInterrupt; + Pal::RegisterEventOutputInfo eventOutputInfo = {}; + result = dev_->iDev()->RegisterEvent( + eventInputInfo, + &eventOutputInfo); + if (result != Pal::Result::Success) { + ClPrint(amd::LOG_ERROR, amd::LOG_QUEUE, + "Failed to register SQ event needed for hostcall buffer"); + return false; + } + amdSignal_->event_id = eventOutputInfo.shaderInterrupt.eventId; + amdSignal_->event_mailbox_ptr = eventOutputInfo.shaderInterrupt.eventMailboxGpuVa; + ClPrint(amd::LOG_INFO, amd::LOG_INIT, + "Registered SQ event %d with mailbox slot %p", + amdSignal_->event_id, amdSignal_->event_mailbox_ptr); +#endif + } + return true; } @@ -67,7 +123,19 @@ uint64_t Signal::Wait(uint64_t value, device::Signal::Condition c, uint64_t time } (c); if (ws_ == device::Signal::WaitState::Blocked) { - guarantee(false, "Unimplemented"); +#if defined(_WIN32) + Pal::Result result = Pal::Result::Success; + + float timeoutInSec = timeout / (1000 * 1000); + result = event_.Wait(timeoutInSec); + + if (result != Pal::Result::Success) { + return -1; + } + + std::atomic_thread_fence(std::memory_order_acquire); + return amdSignal_->value; +#endif } else if (ws_ == device::Signal::WaitState::Active) { auto start = amd::Os::timeNanos(); while (true) { diff --git a/device/pal/palsignal.hpp b/device/pal/palsignal.hpp index d043c6a92..781cbd6b1 100644 --- a/device/pal/palsignal.hpp +++ b/device/pal/palsignal.hpp @@ -24,6 +24,8 @@ #include +#include "palEvent.h" + namespace pal { class Device; @@ -32,6 +34,7 @@ class Signal: public device::Signal { private: const Device* dev_; amd_signal_t* amdSignal_; + Util::Event event_; public: ~Signal() override; From 152a816add6b5e1566f282287406a4ae6083f329 Mon Sep 17 00:00:00 2001 From: Chauncey Hui Date: Sat, 31 Jul 2021 03:00:06 -0400 Subject: [PATCH 097/102] SWDEV-2 - Change OpenCL version number from 3346 to 3347 --- utils/versions.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/utils/versions.hpp b/utils/versions.hpp index aaf6948d9..26523e4f5 100644 --- a/utils/versions.hpp +++ b/utils/versions.hpp @@ -28,7 +28,7 @@ #endif // AMD_PLATFORM_NAME #ifndef AMD_PLATFORM_BUILD_NUMBER -#define AMD_PLATFORM_BUILD_NUMBER 3346 +#define AMD_PLATFORM_BUILD_NUMBER 3347 #endif // AMD_PLATFORM_BUILD_NUMBER #ifndef AMD_PLATFORM_REVISION_NUMBER From 613cf58fafa5bb5d3d9b49b545a81fc7b64a0ef7 Mon Sep 17 00:00:00 2001 From: anusha GodavarthySurya Date: Fri, 30 Jul 2021 09:02:22 -0700 Subject: [PATCH 098/102] SWDEV-297215 - Set image descriptor as per the HSA specification for hsa_ext_image_descriptor_t Change-Id: I0af0f09120f15a42349ec4de491df8aee7bfd46d --- device/rocm/rocmemory.cpp | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/device/rocm/rocmemory.cpp b/device/rocm/rocmemory.cpp index f4e6ee1e8..2182111c8 100644 --- a/device/rocm/rocmemory.cpp +++ b/device/rocm/rocmemory.cpp @@ -1005,28 +1005,28 @@ void Image::populateImageDescriptor() { switch (image->getType()) { case CL_MEM_OBJECT_IMAGE1D: imageDescriptor_.geometry = HSA_EXT_IMAGE_GEOMETRY_1D; - imageDescriptor_.height = 1; - imageDescriptor_.depth = 1; + imageDescriptor_.height = 0; + imageDescriptor_.depth = 0; break; case CL_MEM_OBJECT_IMAGE1D_BUFFER: imageDescriptor_.geometry = HSA_EXT_IMAGE_GEOMETRY_1DB; - imageDescriptor_.height = 1; - imageDescriptor_.depth = 1; + imageDescriptor_.height = 0; + imageDescriptor_.depth = 0; break; case CL_MEM_OBJECT_IMAGE1D_ARRAY: //@todo - arraySize = height ?! imageDescriptor_.geometry = HSA_EXT_IMAGE_GEOMETRY_1DA; - imageDescriptor_.height = 1; + imageDescriptor_.height = 0; imageDescriptor_.array_size = image->getHeight(); break; case CL_MEM_OBJECT_IMAGE2D: imageDescriptor_.geometry = HSA_EXT_IMAGE_GEOMETRY_2D; - imageDescriptor_.depth = 1; + imageDescriptor_.depth = 0; break; case CL_MEM_OBJECT_IMAGE2D_ARRAY: //@todo - arraySize = depth ?! imageDescriptor_.geometry = HSA_EXT_IMAGE_GEOMETRY_2DA; - imageDescriptor_.depth = 1; + imageDescriptor_.depth = 0; imageDescriptor_.array_size = image->getDepth(); break; case CL_MEM_OBJECT_IMAGE3D: From 100ddb528883d28b644fabb8117823f34a757599 Mon Sep 17 00:00:00 2001 From: Chauncey Hui Date: Mon, 2 Aug 2021 03:00:06 -0400 Subject: [PATCH 099/102] SWDEV-2 - Change OpenCL version number from 3347 to 3348 --- utils/versions.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/utils/versions.hpp b/utils/versions.hpp index 26523e4f5..cd5f12da2 100644 --- a/utils/versions.hpp +++ b/utils/versions.hpp @@ -28,7 +28,7 @@ #endif // AMD_PLATFORM_NAME #ifndef AMD_PLATFORM_BUILD_NUMBER -#define AMD_PLATFORM_BUILD_NUMBER 3347 +#define AMD_PLATFORM_BUILD_NUMBER 3348 #endif // AMD_PLATFORM_BUILD_NUMBER #ifndef AMD_PLATFORM_REVISION_NUMBER From d14ee4456575c06bf07c5c6401d782ed8b6a7383 Mon Sep 17 00:00:00 2001 From: German Andryeyev Date: Fri, 30 Jul 2021 18:07:34 -0400 Subject: [PATCH 100/102] SWDEV-296329 - Add lock protection for Timestamp update There is a possible race condition when signal reuse can have access to a destroyed Timestamp object, because the callback was running asynchronously. Use reference counter and lock to allow asynchronous timestamp update Change-Id: I6224f7c62cb0a03a7466fcc512e5e5afb06736fa --- device/rocm/rocvirtual.cpp | 34 ++++++++++++++++++---------------- device/rocm/rocvirtual.hpp | 6 ++++-- 2 files changed, 22 insertions(+), 18 deletions(-) diff --git a/device/rocm/rocvirtual.cpp b/device/rocm/rocvirtual.cpp index 9cde1f30a..db803164a 100644 --- a/device/rocm/rocvirtual.cpp +++ b/device/rocm/rocvirtual.cpp @@ -110,6 +110,7 @@ static unsigned extractAqlBits(unsigned v, unsigned pos, unsigned width) { // ================================================================================================ void Timestamp::checkGpuTime() { + amd::ScopedLock s(lock_); if (HwProfiling()) { uint64_t start = std::numeric_limits::max(); uint64_t end = 0; @@ -140,7 +141,6 @@ void Timestamp::checkGpuTime() { ClPrint(amd::LOG_INFO, amd::LOG_SIG, "Signal = (0x%lx), start = %ld, " "end = %ld", it->signal_.handle, start, end); } - it->ts_ = nullptr; it->done_ = true; } signals_.clear(); @@ -399,6 +399,7 @@ hsa_signal_t VirtualGPU::HwQueueTracker::ActiveSignal( if (ts != 0) { // Save HSA signal earlier to make sure the possible callback will have a valid // value for processing + ts->retain(); prof_signal->ts_ = ts; ts->AddProfilingSignal(prof_signal); // If direct dispatch is enabled and the batch head isn't null, then it's a marker and @@ -437,7 +438,7 @@ hsa_signal_t VirtualGPU::HwQueueTracker::ActiveSignal( // ================================================================================================ std::vector& VirtualGPU::HwQueueTracker::WaitingSignal(HwQueueEngine engine) { bool explicit_wait = false; - // Rest all current waiting signals + // Reset all current waiting signals waiting_signals_.clear(); // Does runtime switch the active engine? @@ -499,21 +500,22 @@ std::vector& VirtualGPU::HwQueueTracker::WaitingSignal(HwQueueEngi // ================================================================================================ bool VirtualGPU::HwQueueTracker::CpuWaitForSignal(ProfilingSignal* signal) { - amd::ScopedLock lock(signal->LockSignalOps()); // Wait for the current signal - if (!signal->done_) { + if (signal->ts_ != nullptr) { // Update timestamp values if requested - if (signal->ts_ != nullptr) { - signal->ts_->checkGpuTime(); - } else { - ClPrint(amd::LOG_DEBUG, amd::LOG_COPY, "[%zx]!\t Host wait on completion_signal=0x%zx", - std::this_thread::get_id(), signal->signal_.handle); - if (!WaitForSignal(signal->signal_, gpu_.ActiveWait())) { - LogPrintfError("Failed signal [0x%lx] wait", signal->signal_); - return false; - } - signal->done_ = true; + auto ts = signal->ts_; + ts->checkGpuTime(); + ts->release(); + signal->ts_ = nullptr; + } else if (!signal->done_) { + amd::ScopedLock lock(signal->LockSignalOps()); + ClPrint(amd::LOG_DEBUG, amd::LOG_COPY, "[%zx]!\t Host wait on completion_signal=0x%zx", + std::this_thread::get_id(), signal->signal_.handle); + if (!WaitForSignal(signal->signal_, gpu_.ActiveWait())) { + LogPrintfError("Failed signal [0x%lx] wait", signal->signal_); + return false; } + signal->done_ = true; } return true; } @@ -1079,7 +1081,7 @@ VirtualGPU::~VirtualGPU() { releasePinnedMem(); if (timestamp_ != nullptr) { - delete timestamp_; + timestamp_->release(); timestamp_ = nullptr; LogError("There was a timestamp that was not used; deleting."); } @@ -1315,7 +1317,7 @@ void VirtualGPU::updateCommandsState(amd::Command* list) const { ts = reinterpret_cast(current->data()); startTimeStamp = ts->getStart(); endTimeStamp = ts->getEnd(); - delete ts; + ts->release(); current->setData(nullptr); } else { // If we don't have a command that contains a valid timestamp, diff --git a/device/rocm/rocvirtual.hpp b/device/rocm/rocvirtual.hpp index 0d5b41010..f1efa435c 100644 --- a/device/rocm/rocvirtual.hpp +++ b/device/rocm/rocvirtual.hpp @@ -82,7 +82,7 @@ inline bool WaitForSignal(hsa_signal_t signal, bool active_wait = false) { // Timestamp for keeping track of some profiling information for various commands // including EnqueueNDRangeKernel and clEnqueueCopyBuffer. -class Timestamp : public amd::HeapObject { +class Timestamp : public amd::ReferenceCountedObject { private: static double ticksToTime_; @@ -93,6 +93,7 @@ class Timestamp : public amd::HeapObject { amd::Command* parsedCommand_; //!< Command down the list, considering command_ as head std::vector signals_; //!< The list of all signals, associated with the TS hsa_signal_t callback_signal_; //!< Signal associated with a callback for possible later update + amd::Monitor lock_; //!< Serialize timestamp update Timestamp(const Timestamp&) = delete; Timestamp& operator=(const Timestamp&) = delete; @@ -104,7 +105,8 @@ class Timestamp : public amd::HeapObject { , gpu_(gpu) , command_(command) , parsedCommand_(nullptr) - , callback_signal_(hsa_signal_t{}) {} + , callback_signal_(hsa_signal_t{}) + , lock_("Timestamp lock", true) {} ~Timestamp() {} From 96724e055997473b36c74a0516d1e5ad9e1ca959 Mon Sep 17 00:00:00 2001 From: Chauncey Hui Date: Tue, 3 Aug 2021 03:00:08 -0400 Subject: [PATCH 101/102] SWDEV-2 - Change OpenCL version number from 3348 to 3349 --- utils/versions.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/utils/versions.hpp b/utils/versions.hpp index cd5f12da2..433c6015f 100644 --- a/utils/versions.hpp +++ b/utils/versions.hpp @@ -28,7 +28,7 @@ #endif // AMD_PLATFORM_NAME #ifndef AMD_PLATFORM_BUILD_NUMBER -#define AMD_PLATFORM_BUILD_NUMBER 3348 +#define AMD_PLATFORM_BUILD_NUMBER 3349 #endif // AMD_PLATFORM_BUILD_NUMBER #ifndef AMD_PLATFORM_REVISION_NUMBER From 8243fe4ca68ab54306cd1a218ff13c20eaf85551 Mon Sep 17 00:00:00 2001 From: Dennis Schridde Date: Mon, 6 Sep 2021 12:07:47 +0200 Subject: [PATCH 102/102] Fix OpenCL headers 2021.04.29 compatibility OpenCL headers 2021.04.29 moved `CL_COMMAND_GL_FENCE_SYNC_OBJECT_KHR` into a different header. (See issue referenced below for details.) See-also: https://github.com/KhronosGroup/OpenCL-Headers/issues/145 See-also: https://bugs.gentoo.org/790164 --- platform/command.hpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/platform/command.hpp b/platform/command.hpp index 47bc74ac9..838737a53 100644 --- a/platform/command.hpp +++ b/platform/command.hpp @@ -44,7 +44,8 @@ #include "platform/activity.hpp" #include "platform/command_utils.hpp" -#include "CL/cl_ext.h" +#include +#include #include #include