From 44773055a2fed79e8e801411254c0c49f53d3527 Mon Sep 17 00:00:00 2001 From: Avik Pal Date: Mon, 5 May 2025 14:55:09 -0400 Subject: [PATCH 1/4] ci: use self-hosted runners --- .buildkite/pipeline.yml | 141 --------------------------------------- .github/workflows/CI.yml | 85 ++++++++++++++++++++++- 2 files changed, 84 insertions(+), 142 deletions(-) delete mode 100644 .buildkite/pipeline.yml diff --git a/.buildkite/pipeline.yml b/.buildkite/pipeline.yml deleted file mode 100644 index f82d59b522..0000000000 --- a/.buildkite/pipeline.yml +++ /dev/null @@ -1,141 +0,0 @@ -steps: - - group: ":test_tube: Tests" - steps: - - label: ":julia: :linux: CUDA Julia v{{matrix.version}} -- {{matrix.group}} -- {{matrix.runtime}}" - matrix: - setup: - version: - - "1.10" - group: - - core - - neural_networks - - integration - runtime: - - "PJRT" - - "IFRT" - plugins: - - JuliaCI/julia#v1: - version: "{{matrix.version}}" - - JuliaCI/julia-coverage#v1: - codecov: true - dirs: - - src - - ext - - lib/ReactantCore/src - commands: | - touch LocalPreferences.toml - - echo "[Reactant]" >> LocalPreferences.toml - echo "xla_runtime = \"{{matrix.runtime}}\"" >> LocalPreferences.toml - - cat LocalPreferences.toml - - julia --project=. -e 'println("--- :julia: Instantiating project") - using Pkg - Pkg.develop([PackageSpec(path="lib/ReactantCore")])' - - julia --project=. -e 'println("--- :julia: Run Tests") - using Pkg - Pkg.test(; coverage="user")' - agents: - queue: "juliagpu" - cuda: "*" - env: - REACTANT_TEST_GROUP: "{{matrix.group}}" - JULIA_DEBUG: "Reactant,Reactant_jll" - CUDA_VISIBLE_DEVICES: 0 - REACTANT_BACKEND_GROUP: "GPU" - if: build.message !~ /\[skip tests\]/ - timeout_in_minutes: 120 - - # - group: ":racehorse: Benchmarks" - # steps: - # - label: "CPU: Run Benchmarks" - # plugins: - # - JuliaCI/julia#v1: - # version: "1" - # command: | - # julia --project=benchmark -e 'println("--- :julia: Instantiating project") - # using Pkg - # Pkg.develop([PackageSpec(path=pwd()), PackageSpec(path="lib/ReactantCore")])' - - # julia --project=benchmark -e 'println("--- :julia: Run Benchmarks") - # include("benchmark/runbenchmarks.jl")' - # artifact_paths: - # - "benchmark/results/*" - # agents: - # # Models are quite large so we need a decent sized machine. Don't tell Chris we - # # are stealing SciMLBenchmarks machine :P - # queue: "juliaecosystem" - # sandbox_capable: true - # exclusive: true - # arch: "x86_64" - # env: - # BENCHMARK_GROUP: CPU - # JULIA_NUM_THREADS: "auto" - # timeout_in_minutes: 120 - - # - label: "CUDA: Run Benchmarks" - # plugins: - # - JuliaCI/julia#v1: - # version: "1" - # command: | - # julia --project=benchmark -e 'println("--- :julia: Instantiating project") - # using Pkg - # Pkg.develop([PackageSpec(path=pwd()), PackageSpec(path="lib/ReactantCore")])' - - # julia --project=benchmark -e 'println("--- :julia: Run Benchmarks") - # include("benchmark/runbenchmarks.jl")' - # artifact_paths: - # - "benchmark/results/*" - # agents: - # queue: "benchmark" - # gpu: "rtx4070" - # cuda: "*" - # env: - # BENCHMARK_GROUP: CUDA - # JULIA_NUM_THREADS: "auto" - # timeout_in_minutes: 120 - - # - wait: ~ - # continue_on_failure: true - - # - label: "Combine benchmarks" - # plugins: - # - JuliaCI/julia#v1: - # version: "1" - # command: | - # buildkite-agent artifact download "benchmark/results/*" . - - # julia -e 'println("--- :julia: Instantiating project") - # using Pkg - # Pkg.add("BenchmarkTools") - - # println("--- :julia: Combining Benchmarks") - # include("benchmark/aggregate.jl")' - # artifact_paths: - # - "benchmark/results/combinedbenchmarks.json" - # agents: - # queue: "juliagpu" - # timeout_in_minutes: 10 - - # - label: "AMDGPU Julia v{{matrix.version}}" - # matrix: - # setup: - # version: - # - "1.10" - # plugins: - # - JuliaCI/julia#v1: - # version: "{{matrix.version}}" - # - JuliaCI/julia-test#v1: - # test_args: "--gpu" - # - JuliaCI/julia-coverage#v1: - # codecov: true - # dirs: - # - src - # - ext - # agents: - # queue: "juliagpu" - # rocm: "*" - # if: build.message !~ /\[skip tests\]/ - # timeout_in_minutes: 60 diff --git a/.github/workflows/CI.yml b/.github/workflows/CI.yml index 0ac6d24eea..2eab684801 100644 --- a/.github/workflows/CI.yml +++ b/.github/workflows/CI.yml @@ -29,7 +29,7 @@ concurrency: cancel-in-progress: ${{ startsWith(github.ref, 'refs/pull/') }} jobs: - test: + test-cpu: timeout-minutes: 90 name: Julia ${{ matrix.version }} - ${{ matrix.test_group }} - ${{ matrix.os }} - ${{ matrix.runtime }} - assertions=${{ matrix.assertions }} - ${{ github.event_name }} runs-on: ${{ matrix.os }} @@ -160,3 +160,86 @@ jobs: - uses: codecov/codecov-action@v5 with: files: lcov.info + + test-cuda: + timeout-minutes: 90 + name: Julia ${{ matrix.version }} - ${{ matrix.test_group }} - linux - ${{ matrix.runtime }} - cuda - ${{ github.event_name }} + runs-on: [self-hosted, linux, x64, cuda] + strategy: + fail-fast: false + matrix: + version: + - "1.10" + - "1.11" + test_group: + - core + - neural_networks + - integration + runtime: + - "PJRT" + - "IFRT" + env: + TMPDIR: ${{ github.workspace }}/tmp + steps: + - uses: actions/checkout@v4 + - name: Create TMPDIR + run: | + mkdir -p ${{ env.TMPDIR }} + - uses: julia-actions/setup-julia@v2 + with: + version: ${{ matrix.version }} + - uses: julia-actions/cache@v2 + - name: "Setup Runtime Preferences" + uses: "DamianReeves/write-file-action@master" + with: + path: "LocalPreferences.toml" + write-mode: "overwrite" + contents: | + [Reactant] + xla_runtime = "${{ matrix.runtime }}" + - name: "Install Dependencies" + run: | + import Pkg + Pkg.Registry.update() + # Install packages present in subdirectories + dev_pks = Pkg.PackageSpec[] + for path in ("lib/ReactantCore",) + push!(dev_pks, Pkg.PackageSpec(; path)) + end + Pkg.develop(dev_pks) + shell: julia --color=yes --code-coverage=user --depwarn=yes --project=. {0} + # Only in Julia v1.10 we need to install `ReactantCore` manually. + if: ${{ matrix.version == '1.10' }} + env: + JULIA_PKG_SERVER_REGISTRY_PREFERENCE: eager + LD_LIBRARY_PATH: "" + - name: "Run Tests" + timeout-minutes: 60 + run: | + import Pkg + Pkg.Registry.update() + Pkg.test(; coverage="user") + shell: julia --color=yes --code-coverage=user --depwarn=yes --project=. {0} + id: run_tests + env: + JULIA_PKG_SERVER_REGISTRY_PREFERENCE: eager + ENABLE_PJRT_COMPATIBILITY: 1 + REACTANT_TEST_GROUP: ${{ matrix.test_group }} + XLA_FLAGS: "--xla_force_host_platform_device_count=12" + JULIA_DEBUG: "Reactant,Reactant_jll" + REACTANT_BACKEND_GROUP: "GPU" + LD_LIBRARY_PATH: "" + XLA_REACTANT_GPU_MEM_FRACTION: "0.25" # our runners are pretty big + - name: Upload MLIR modules + uses: actions/upload-artifact@v4 + timeout-minutes: 10 + if: ${{ always() }} + with: + name: "mlir-${{ matrix.version }}-${{ matrix.test_group }}-linux-${{ matrix.runtime }}-cuda-${{ github.event_name }}" + path: "**/*.mlir" + retention-days: 90 + overwrite: false + - uses: julia-actions/julia-processcoverage@v1 + - uses: codecov/codecov-action@v5 + with: + files: lcov.info From a6a1ba3323719a377b6f2756e43ab492345cf5c3 Mon Sep 17 00:00:00 2001 From: Avik Pal Date: Tue, 6 May 2025 10:50:09 -0400 Subject: [PATCH 2/4] ci: set LD_PRELOAD --- .github/workflows/CI.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/CI.yml b/.github/workflows/CI.yml index 2eab684801..849d78d59a 100644 --- a/.github/workflows/CI.yml +++ b/.github/workflows/CI.yml @@ -230,6 +230,7 @@ jobs: REACTANT_BACKEND_GROUP: "GPU" LD_LIBRARY_PATH: "" XLA_REACTANT_GPU_MEM_FRACTION: "0.25" # our runners are pretty big + LD_PRELOAD: "" - name: Upload MLIR modules uses: actions/upload-artifact@v4 timeout-minutes: 10 From 553aece8e966c07e0a5df90609460d122192f4a7 Mon Sep 17 00:00:00 2001 From: Avik Pal Date: Tue, 6 May 2025 11:18:02 -0400 Subject: [PATCH 3/4] ci: use a common workflow --- .github/workflows/CI.yml | 158 +++------------------------------ .github/workflows/CICommon.yml | 99 +++++++++++++++++++++ 2 files changed, 113 insertions(+), 144 deletions(-) create mode 100644 .github/workflows/CICommon.yml diff --git a/.github/workflows/CI.yml b/.github/workflows/CI.yml index 849d78d59a..c04360049b 100644 --- a/.github/workflows/CI.yml +++ b/.github/workflows/CI.yml @@ -30,9 +30,7 @@ concurrency: jobs: test-cpu: - timeout-minutes: 90 name: Julia ${{ matrix.version }} - ${{ matrix.test_group }} - ${{ matrix.os }} - ${{ matrix.runtime }} - assertions=${{ matrix.assertions }} - ${{ github.event_name }} - runs-on: ${{ matrix.os }} strategy: fail-fast: false matrix: @@ -86,85 +84,16 @@ jobs: # libReactant: packaged # version: '1.10' # test_group: integration - env: - TMPDIR: ${{ github.workspace }}/tmp - steps: - - uses: actions/checkout@v4 - - name: Create TMPDIR - run: | - mkdir -p ${{ env.TMPDIR }} - - uses: julia-actions/setup-julia@v2 - if: ${{ ! matrix.assertions }} - with: - version: ${{ matrix.version }} - - uses: julia-actions/cache@v2 - - uses: actions/checkout@v4 - if: ${{ matrix.assertions }} - with: - repository: "JuliaLang/julia" - ref: release-${{ matrix.version }} - path: "julia" - - name: Compile Julia - if: ${{ matrix.assertions }} - run: | - sed -i.bak 's/exit 2/exit 0/g' julia/deps/tools/jlchecksum - make -C julia -j $(nproc) FORCE_ASSERTIONS=1 LLVM_ASSERTIONS=1 JULIA_PRECOMPILE=0 - echo $PWD/julia/usr/bin >> $GITHUB_PATH - - name: "Setup Runtime Preferences" - uses: "DamianReeves/write-file-action@master" - with: - path: "LocalPreferences.toml" - write-mode: "overwrite" - contents: | - [Reactant] - xla_runtime = "${{ matrix.runtime }}" - - name: "Install Dependencies" - run: | - import Pkg - Pkg.Registry.update() - # Install packages present in subdirectories - dev_pks = Pkg.PackageSpec[] - for path in ("lib/ReactantCore",) - push!(dev_pks, Pkg.PackageSpec(; path)) - end - Pkg.develop(dev_pks) - shell: julia --color=yes --code-coverage=user --depwarn=yes --project=. {0} - # Only in Julia v1.10 we need to install `ReactantCore` manually. - if: ${{ matrix.version == '1.10' }} - env: - JULIA_PKG_SERVER_REGISTRY_PREFERENCE: eager - - name: "Run Tests" - timeout-minutes: 60 - run: | - import Pkg - Pkg.Registry.update() - Pkg.test(; coverage="user") - shell: julia --color=yes --code-coverage=user --depwarn=yes --project=. {0} - id: run_tests - env: - JULIA_PKG_SERVER_REGISTRY_PREFERENCE: eager - ENABLE_PJRT_COMPATIBILITY: 1 - REACTANT_TEST_GROUP: ${{ matrix.test_group }} - XLA_FLAGS: "--xla_force_host_platform_device_count=12" - JULIA_DEBUG: "Reactant,Reactant_jll" - - name: Upload MLIR modules - uses: actions/upload-artifact@v4 - timeout-minutes: 10 - if: ${{ always() }} - with: - name: "mlir-${{ matrix.version }}-${{ matrix.test_group }}-${{ matrix.os }}-${{ matrix.runtime }}-assertions=${{ matrix.assertions }}-${{ github.event_name }}" - path: "**/*.mlir" - retention-days: 90 - overwrite: false - - uses: julia-actions/julia-processcoverage@v1 - - uses: codecov/codecov-action@v5 - with: - files: lcov.info + uses: ./.github/workflows/CICommon.yml + with: + julia_version: ${{ matrix.version }} + runner: ${{ matrix.os }} + xla_runtime: ${{ matrix.runtime }} + assertions: ${{ matrix.assertions }} + test_group: ${{ matrix.test_group }} test-cuda: - timeout-minutes: 90 name: Julia ${{ matrix.version }} - ${{ matrix.test_group }} - linux - ${{ matrix.runtime }} - cuda - ${{ github.event_name }} - runs-on: [self-hosted, linux, x64, cuda] strategy: fail-fast: false matrix: @@ -178,69 +107,10 @@ jobs: runtime: - "PJRT" - "IFRT" - env: - TMPDIR: ${{ github.workspace }}/tmp - steps: - - uses: actions/checkout@v4 - - name: Create TMPDIR - run: | - mkdir -p ${{ env.TMPDIR }} - - uses: julia-actions/setup-julia@v2 - with: - version: ${{ matrix.version }} - - uses: julia-actions/cache@v2 - - name: "Setup Runtime Preferences" - uses: "DamianReeves/write-file-action@master" - with: - path: "LocalPreferences.toml" - write-mode: "overwrite" - contents: | - [Reactant] - xla_runtime = "${{ matrix.runtime }}" - - name: "Install Dependencies" - run: | - import Pkg - Pkg.Registry.update() - # Install packages present in subdirectories - dev_pks = Pkg.PackageSpec[] - for path in ("lib/ReactantCore",) - push!(dev_pks, Pkg.PackageSpec(; path)) - end - Pkg.develop(dev_pks) - shell: julia --color=yes --code-coverage=user --depwarn=yes --project=. {0} - # Only in Julia v1.10 we need to install `ReactantCore` manually. - if: ${{ matrix.version == '1.10' }} - env: - JULIA_PKG_SERVER_REGISTRY_PREFERENCE: eager - LD_LIBRARY_PATH: "" - - name: "Run Tests" - timeout-minutes: 60 - run: | - import Pkg - Pkg.Registry.update() - Pkg.test(; coverage="user") - shell: julia --color=yes --code-coverage=user --depwarn=yes --project=. {0} - id: run_tests - env: - JULIA_PKG_SERVER_REGISTRY_PREFERENCE: eager - ENABLE_PJRT_COMPATIBILITY: 1 - REACTANT_TEST_GROUP: ${{ matrix.test_group }} - XLA_FLAGS: "--xla_force_host_platform_device_count=12" - JULIA_DEBUG: "Reactant,Reactant_jll" - REACTANT_BACKEND_GROUP: "GPU" - LD_LIBRARY_PATH: "" - XLA_REACTANT_GPU_MEM_FRACTION: "0.25" # our runners are pretty big - LD_PRELOAD: "" - - name: Upload MLIR modules - uses: actions/upload-artifact@v4 - timeout-minutes: 10 - if: ${{ always() }} - with: - name: "mlir-${{ matrix.version }}-${{ matrix.test_group }}-linux-${{ matrix.runtime }}-cuda-${{ github.event_name }}" - path: "**/*.mlir" - retention-days: 90 - overwrite: false - - uses: julia-actions/julia-processcoverage@v1 - - uses: codecov/codecov-action@v5 - with: - files: lcov.info + uses: ./.github/workflows/CICommon.yml + with: + julia_version: ${{ matrix.version }} + runner: cuda + xla_runtime: ${{ matrix.runtime }} + assertions: false + test_group: ${{ matrix.test_group }} diff --git a/.github/workflows/CICommon.yml b/.github/workflows/CICommon.yml new file mode 100644 index 0000000000..c6896e3456 --- /dev/null +++ b/.github/workflows/CICommon.yml @@ -0,0 +1,99 @@ +name: "CI Common" + +on: + workflow_call: + inputs: + julia_version: + description: 'Julia version' + required: true + type: string + runner: + description: 'OS/Runner' + required: true + type: string + xla_runtime: + description: 'The XLA runtime' + required: true + type: string + assertions: + description: 'Enable assertions' + required: false + default: false + type: boolean + test_group: + description: 'Test group' + required: true + type: string + +jobs: + test: + name: Julia ${{ inputs.julia_version }} - ${{ inputs.runner }} - ${{ inputs.xla_runtime }} - ${{ github.event_name }} + runs-on: ${{ inputs.runner }} + env: + TMPDIR: ${{ github.workspace }}/tmp + JULIA_PKG_SERVER_REGISTRY_PREFERENCE: eager + JULIA_DEBUG: "Reactant,Reactant_jll" + REACTANT_TEST_GROUP: ${{ inputs.test_group }} + XLA_FLAGS: "--xla_force_host_platform_device_count=12" + steps: + - uses: actions/checkout@v4 + - name: Create TMPDIR + run: | + mkdir -p ${{ env.TMPDIR }} + - uses: julia-actions/setup-julia@v2 + with: + version: ${{ inputs.julia_version }} + - uses: julia-actions/cache@v2 + - uses: actions/checkout@v4 + if: ${{ inputs.assertions }} + with: + repository: "JuliaLang/julia" + ref: release-${{ inputs.julia_version }} + path: "julia" + - name: Compile Julia + if: ${{ inputs.assertions }} + run: | + sed -i.bak 's/exit 2/exit 0/g' julia/deps/tools/jlchecksum + make -C julia -j $(nproc) FORCE_ASSERTIONS=1 LLVM_ASSERTIONS=1 JULIA_PRECOMPILE=0 + echo $PWD/julia/usr/bin >> $GITHUB_PATH + - name: "Setup Runtime Preferences" + uses: "DamianReeves/write-file-action@master" + with: + path: "LocalPreferences.toml" + write-mode: "overwrite" + contents: | + [Reactant] + xla_runtime = "${{ inputs.xla_runtime }}" + - name: "Install Dependencies" + run: | + import Pkg + Pkg.Registry.update() + # Install packages present in subdirectories + dev_pks = Pkg.PackageSpec[] + for path in ("lib/ReactantCore",) + push!(dev_pks, Pkg.PackageSpec(; path)) + end + Pkg.develop(dev_pks) + shell: julia --color=yes --code-coverage=user --depwarn=yes --project=. {0} + # Only in Julia v1.10 we need to install `ReactantCore` manually. + if: ${{ inputs.julia_version == '1.10' }} + - name: "Run Tests" + timeout-minutes: 60 + run: | + import Pkg + Pkg.Registry.update() + Pkg.test(; coverage="user") + shell: julia --color=yes --code-coverage=user --depwarn=yes --project=. {0} + id: run_tests + - name: Upload MLIR modules + uses: actions/upload-artifact@v4 + timeout-minutes: 10 + with: + name: "mlir-${{ inputs.julia_version }}-${{ inputs.test_group }}-${{ inputs.os }}-${{ inputs.xla_runtime }}-assertions=${{ inputs.assertions }}-${{ github.event_name }}" + path: "**/*.mlir" + retention-days: 90 + overwrite: false + - uses: julia-actions/julia-processcoverage@v1 + - uses: codecov/codecov-action@v5 + with: + files: lcov.info From e80b67f69c70a266968d58fa780311a2ed403ad3 Mon Sep 17 00:00:00 2001 From: Avik Pal Date: Tue, 6 May 2025 11:55:18 -0400 Subject: [PATCH 4/4] fix: updates --- .github/workflows/CICommon.yml | 1 + src/Compiler.jl | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/.github/workflows/CICommon.yml b/.github/workflows/CICommon.yml index c6896e3456..469c84d63c 100644 --- a/.github/workflows/CICommon.yml +++ b/.github/workflows/CICommon.yml @@ -35,6 +35,7 @@ jobs: JULIA_DEBUG: "Reactant,Reactant_jll" REACTANT_TEST_GROUP: ${{ inputs.test_group }} XLA_FLAGS: "--xla_force_host_platform_device_count=12" + LD_LIBRARY_PATH: "" # use binaries from our JLLs steps: - uses: actions/checkout@v4 - name: Create TMPDIR diff --git a/src/Compiler.jl b/src/Compiler.jl index 3dca71d674..3c5f0b3f61 100644 --- a/src/Compiler.jl +++ b/src/Compiler.jl @@ -637,7 +637,7 @@ function optimization_passes(; "slice_dot_general<1>", "if_inline<1>", "if_to_select<1>", - "dynamic_update_slice_const_prop", + "dynamic_update_slice_const_prop(0)", "dynamic_gather_op_is_not_dynamic<16>", "divide_sqrt_to_multiply_rsqrt<16>", "associative_binary_op_reordering<1>",