From 9884fa3da65837d210c51c98643b02a2509605b2 Mon Sep 17 00:00:00 2001 From: David Rivera Date: Sat, 9 Aug 2025 14:35:33 -0400 Subject: [PATCH] [CIR][CIRGen][Builtin][X86] Lower avx512 `gather` intrinsics --- clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp | 91 ++++++++++- clang/test/CIR/CodeGen/X86/avx512f-builtins.c | 136 +++++++++++++++- .../test/CIR/CodeGen/X86/avx512vl-builtins.c | 145 +++++++++++++++++- 3 files changed, 368 insertions(+), 4 deletions(-) diff --git a/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp b/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp index 38ba28ba7291..1ab8b393d50d 100644 --- a/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp +++ b/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp @@ -725,8 +725,95 @@ mlir::Value CIRGenFunction::emitX86BuiltinExpr(unsigned BuiltinID, case X86::BI__builtin_ia32_gathersiv8di: case X86::BI__builtin_ia32_gathersiv16si: case X86::BI__builtin_ia32_gatherdiv8di: - case X86::BI__builtin_ia32_gatherdiv16si: - llvm_unreachable("gather3div2df NYI"); + case X86::BI__builtin_ia32_gatherdiv16si: { + StringRef intrinsicName; + switch (BuiltinID) { + default: + llvm_unreachable("Unexpected builtin"); + case X86::BI__builtin_ia32_gather3div2df: + intrinsicName = "x86.avx512.mask.gather3div2.df"; + break; + case X86::BI__builtin_ia32_gather3div2di: + intrinsicName = "x86.avx512.mask.gather3div2.di"; + break; + case X86::BI__builtin_ia32_gather3div4df: + intrinsicName = "x86.avx512.mask.gather3div4.df"; + break; + case X86::BI__builtin_ia32_gather3div4di: + intrinsicName = "x86.avx512.mask.gather3div4.di"; + break; + case X86::BI__builtin_ia32_gather3div4sf: + intrinsicName = "x86.avx512.mask.gather3div4.sf"; + break; + case X86::BI__builtin_ia32_gather3div4si: + intrinsicName = "x86.avx512.mask.gather3div4.si"; + break; + case X86::BI__builtin_ia32_gather3div8sf: + intrinsicName = "x86.avx512.mask.gather3div8.sf"; + break; + case X86::BI__builtin_ia32_gather3div8si: + intrinsicName = "x86.avx512.mask.gather3div8.si"; + break; + case X86::BI__builtin_ia32_gather3siv2df: + intrinsicName = "x86.avx512.mask.gather3siv2.df"; + break; + case X86::BI__builtin_ia32_gather3siv2di: + intrinsicName = "x86.avx512.mask.gather3siv2.di"; + break; + case X86::BI__builtin_ia32_gather3siv4df: + intrinsicName = "x86.avx512.mask.gather3siv4.df"; + break; + case X86::BI__builtin_ia32_gather3siv4di: + intrinsicName = "x86.avx512.mask.gather3siv4.di"; + break; + case X86::BI__builtin_ia32_gather3siv4sf: + intrinsicName = "x86.avx512.mask.gather3siv4.sf"; + break; + case X86::BI__builtin_ia32_gather3siv4si: + intrinsicName = "x86.avx512.mask.gather3siv4.si"; + break; + case X86::BI__builtin_ia32_gather3siv8sf: + intrinsicName = "x86.avx512.mask.gather3siv8.sf"; + break; + case X86::BI__builtin_ia32_gather3siv8si: + intrinsicName = "x86.avx512.mask.gather3siv8.si"; + break; + case X86::BI__builtin_ia32_gathersiv8df: + intrinsicName = "x86.avx512.mask.gather.dpd.512"; + break; + case X86::BI__builtin_ia32_gathersiv16sf: + intrinsicName = "x86.avx512.mask.gather.dps.512"; + break; + case X86::BI__builtin_ia32_gatherdiv8df: + intrinsicName = "x86.avx512.mask.gather.qpd.512"; + break; + case X86::BI__builtin_ia32_gatherdiv16sf: + intrinsicName = "x86.avx512.mask.gather.qps.512"; + break; + case X86::BI__builtin_ia32_gathersiv8di: + intrinsicName = "x86.avx512.mask.gather.dpq.512"; + break; + case X86::BI__builtin_ia32_gathersiv16si: + intrinsicName = "x86.avx512.mask.gather.dpi.512"; + break; + case X86::BI__builtin_ia32_gatherdiv8di: + intrinsicName = "x86.avx512.mask.gather.qpq.512"; + break; + case X86::BI__builtin_ia32_gatherdiv16si: + intrinsicName = "x86.avx512.mask.gather.qpi.512"; + break; + } + + unsigned minElts = + std::min(cast(Ops[0].getType()).getSize(), + cast(Ops[2].getType()).getSize()); + Ops[3] = getMaskVecValue(*this, Ops[3], minElts, getLoc(E->getExprLoc())); + return builder + .create( + getLoc(E->getExprLoc()), builder.getStringAttr(intrinsicName.str()), + convertType(E->getType()), Ops) + .getResult(); + } case X86::BI__builtin_ia32_scattersiv8df: case X86::BI__builtin_ia32_scattersiv16sf: case X86::BI__builtin_ia32_scatterdiv8df: diff --git a/clang/test/CIR/CodeGen/X86/avx512f-builtins.c b/clang/test/CIR/CodeGen/X86/avx512f-builtins.c index 7a89758d8140..35c8e57ac7bb 100644 --- a/clang/test/CIR/CodeGen/X86/avx512f-builtins.c +++ b/clang/test/CIR/CodeGen/X86/avx512f-builtins.c @@ -372,4 +372,138 @@ void test_mm512_mask_compressstoreu_epi32(void *__P, __mmask16 __U, __m512i __A) // LLVM-LABEL: test_mm512_mask_compressstoreu_epi32 // LLVM: @llvm.masked.compressstore.v16i32(<16 x i32> %{{.*}}, ptr %{{.*}}, <16 x i1> %{{.*}}) return _mm512_mask_compressstoreu_epi32(__P, __U, __A); -} \ No newline at end of file +} +__m512d test_mm512_i32gather_pd(__m256i __index, void const *__addr) { + // CIR-LABEL: _mm512_i32gather_pd + // CIR: cir.llvm.intrinsic "x86.avx512.mask.gather.dpd.512" + + // LLVM-LABEL: test_mm512_i32gather_pd + // LLVM: @llvm.x86.avx512.mask.gather.dpd.512 + return _mm512_i32gather_pd(__index, __addr, 2); +} + +__m512d test_mm512_mask_i32gather_pd(__m512d __v1_old, __mmask8 __mask, __m256i __index, void const *__addr) { + // CIR-LABEL: _mm512_mask_i32gather_pd + // CIR: cir.llvm.intrinsic "x86.avx512.mask.gather.dpd.512" + + // LLVM-LABEL: test_mm512_mask_i32gather_pd + // LLVM: @llvm.x86.avx512.mask.gather.dpd.512 + return _mm512_mask_i32gather_pd(__v1_old, __mask, __index, __addr, 2); +} + +__m512 test_mm512_i32gather_ps(__m512i __index, void const *__addr) { + // CIR-LABEL: _mm512_i32gather_ps + // CIR: cir.llvm.intrinsic "x86.avx512.mask.gather.dps.512" + + // LLVM-LABEL: test_mm512_i32gather_ps + // LLVM: @llvm.x86.avx512.mask.gather.dps.512 + return _mm512_i32gather_ps(__index, __addr, 2); +} + +__m512d test_mm512_i64gather_pd(__m512i __index, void const *__addr) { + // CIR-LABEL: _mm512_i64gather_pd + // CIR: cir.llvm.intrinsic "x86.avx512.mask.gather.qpd.512" + + // LLVM-LABEL: test_mm512_i64gather_pd + // CHECK: @llvm.x86.avx512.mask.gather.qpd.512 + return _mm512_i64gather_pd(__index, __addr, 2); +} + +__m512d test_mm512_mask_i64gather_pd(__m512d __v1_old, __mmask8 __mask, __m512i __index, void const *__addr) { + // CIR-LABEL: _mm512_mask_i64gather_pd + // CIR: cir.llvm.intrinsic "x86.avx512.mask.gather.qpd.512" + + // LLVM-LABEL: test_mm512_mask_i64gather_pd + // CHECK: @llvm.x86.avx512.mask.gather.qpd.512 + return _mm512_mask_i64gather_pd(__v1_old, __mask, __index, __addr, 2); +} + +__m256 test_mm512_i64gather_ps(__m512i __index, void const *__addr) { + // CIR-LABEL: _mm512_i64gather_ps + // CIR: cir.llvm.intrinsic "x86.avx512.mask.gather.qps.512" + + // LLVM-LABEL: test_mm512_i64gather_ps + // LLVM: @llvm.x86.avx512.mask.gather.qps.512 + return _mm512_i64gather_ps(__index, __addr, 2); +} + +__m256 test_mm512_mask_i64gather_ps(__m256 __v1_old, __mmask8 __mask, __m512i __index, void const *__addr) { + // CIR-LABEL: _mm512_mask_i64gather_ps + // CIR: cir.llvm.intrinsic "x86.avx512.mask.gather.qps.512" + + // LLVM-LABEL: test_mm512_mask_i64gather_ps + // LLVM: @llvm.x86.avx512.mask.gather.qps.512 + return _mm512_mask_i64gather_ps(__v1_old, __mask, __index, __addr, 2); +} + +__m512i test_mm512_i32gather_epi64(__m256i __index, void const *__addr) { + // CIR-LABEL: _mm512_i32gather_epi64 + // CIR: cir.llvm.intrinsic "x86.avx512.mask.gather.dpq.512" + + // LLVM-LABEL: test_mm512_i32gather_epi64 + // LLVM: @llvm.x86.avx512.mask.gather.dpq.512 + return _mm512_i32gather_epi64(__index, __addr, 2); +} + +__m512i test_mm512_mask_i32gather_epi64(__m512i __v1_old, __mmask8 __mask, __m256i __index, void const *__addr) { + // CIR-LABEL: _mm512_mask_i32gather_epi64 + // CIR: cir.llvm.intrinsic "x86.avx512.mask.gather.dpq.512" + + // LLVM-LABEL: test_mm512_mask_i32gather_epi64 + // LLVM: @llvm.x86.avx512.mask.gather.dpq.512 + return _mm512_mask_i32gather_epi64(__v1_old, __mask, __index, __addr, 2); +} + +__m512i test_mm512_i32gather_epi32(__m512i __index, void const *__addr) { + // CIR-LABEL: _mm512_i32gather_epi32 + // CIR: cir.llvm.intrinsic "x86.avx512.mask.gather.dpi.512" + + // LLVM-LABEL: test_mm512_i32gather_epi32 + // LLVM: @llvm.x86.avx512.mask.gather.dpi.512 + return _mm512_i32gather_epi32(__index, __addr, 2); +} + +__m512i test_mm512_mask_i32gather_epi32(__m512i __v1_old, __mmask16 __mask, __m512i __index, void const *__addr) { + // CIR-LABEL: _mm512_mask_i32gather_epi32 + // CIR: cir.llvm.intrinsic "x86.avx512.mask.gather.dpi.512" + + // LLVM-LABEL: test_mm512_mask_i32gather_epi32 + // LLVM: @llvm.x86.avx512.mask.gather.dpi.512 + return _mm512_mask_i32gather_epi32(__v1_old, __mask, __index, __addr, 2); +} + +__m512i test_mm512_i64gather_epi64(__m512i __index, void const *__addr) { + // CIR-LABEL: _mm512_i64gather_epi64 + // CIR: cir.llvm.intrinsic "x86.avx512.mask.gather.qpq.512" + + // LLVM-LABEL: test_mm512_i64gather_epi64 + // LLVM: @llvm.x86.avx512.mask.gather.qpq.512 + return _mm512_i64gather_epi64(__index, __addr, 2); +} + +__m512i test_mm512_mask_i64gather_epi64(__m512i __v1_old, __mmask8 __mask, __m512i __index, void const *__addr) { + // CIR-LABEL: _mm512_mask_i64gather_epi64 + // CIR: cir.llvm.intrinsic "x86.avx512.mask.gather.qpq.512" + + // LLVM-LABEL: test_mm512_mask_i64gather_epi64 + // LLVM: @llvm.x86.avx512.mask.gather.qpq.512 + return _mm512_mask_i64gather_epi64(__v1_old, __mask, __index, __addr, 2); +} + +__m256i test_mm512_i64gather_epi32(__m512i __index, void const *__addr) { + // CIR-LABEL: _mm512_i64gather_epi32 + // CIR: cir.llvm.intrinsic "x86.avx512.mask.gather.qpi.512" + + // LLVM-LABEL: test_mm512_i64gather_epi32 + // LLVM: @llvm.x86.avx512.mask.gather.qpi.512 + return _mm512_i64gather_epi32(__index, __addr, 2); +} + +__m256i test_mm512_mask_i64gather_epi32(__m256i __v1_old, __mmask8 __mask, __m512i __index, void const *__addr) { + // CIR-LABEL: _mm512_mask_i64gather_epi32 + // CIR: cir.llvm.intrinsic "x86.avx512.mask.gather.qpi.512" + + // LLVM-LABEL: test_mm512_mask_i64gather_epi32 + // LLVM: @llvm.x86.avx512.mask.gather.qpi.512 + return _mm512_mask_i64gather_epi32(__v1_old, __mask, __index, __addr, 2); +} diff --git a/clang/test/CIR/CodeGen/X86/avx512vl-builtins.c b/clang/test/CIR/CodeGen/X86/avx512vl-builtins.c index 8e4027e2503f..b729606d2d6b 100644 --- a/clang/test/CIR/CodeGen/X86/avx512vl-builtins.c +++ b/clang/test/CIR/CodeGen/X86/avx512vl-builtins.c @@ -597,4 +597,147 @@ void test_mm256_mask_compressstoreu_epi32(void *__P, __mmask8 __U, __m256i __A) // LLVM-LABEL: @test_mm256_mask_compressstoreu_epi32 // LLVM: @llvm.masked.compressstore.v8i32(<8 x i32> %{{.*}}, ptr %{{.*}}, <8 x i1> %{{.*}}) return _mm256_mask_compressstoreu_epi32(__P,__U,__A); -} \ No newline at end of file +} +__m128d test_mm_mmask_i64gather_pd(__m128d __v1_old, __mmask8 __mask, __m128i __index, void const *__addr) { + // CIR-LABEL: test_mm_mmask_i64gather_pd + // CIR: cir.llvm.intrinsic "x86.avx512.mask.gather3div2.df" + + // LLVM-LABEL: @test_mm_mmask_i64gather_pd + // LLVM: @llvm.x86.avx512.mask.gather3div2.df + return _mm_mmask_i64gather_pd(__v1_old, __mask, __index, __addr, 2); +} + +__m128i test_mm_mmask_i64gather_epi64(__m128i __v1_old, __mmask8 __mask, __m128i __index, void const *__addr) { + // CIR-LABEL: test_mm_mmask_i64gather_epi64 + // CIR: cir.llvm.intrinsic "x86.avx512.mask.gather3div2.di" + + // LLVM-LABEL: @test_mm_mmask_i64gather_epi64 + // LLVM: @llvm.x86.avx512.mask.gather3div2.di + return _mm_mmask_i64gather_epi64(__v1_old, __mask, __index, __addr, 2); +} + +__m256d test_mm256_mmask_i64gather_pd(__m256d __v1_old, __mmask8 __mask, __m256i __index, void const *__addr) { + // CIR-LABEL: test_mm256_mmask_i64gather_pd + // CIR: cir.llvm.intrinsic "x86.avx512.mask.gather3div4.df" + + // LLVM-LABEL: @test_mm256_mmask_i64gather_pd + // LLVM: @llvm.x86.avx512.mask.gather3div4.df + return _mm256_mmask_i64gather_pd(__v1_old, __mask, __index, __addr, 2); +} + +__m256i test_mm256_mmask_i64gather_epi64(__m256i __v1_old, __mmask8 __mask, __m256i __index, void const *__addr) { + // CIR-LABEL: test_mm256_mmask_i64gather_epi64 + // CIR: cir.llvm.intrinsic "x86.avx512.mask.gather3div4.di" + + // LLVM-LABEL: @test_mm256_mmask_i64gather_epi64 + // LLVM: @llvm.x86.avx512.mask.gather3div4.di + return _mm256_mmask_i64gather_epi64(__v1_old, __mask, __index, __addr, 2); +} + +__m128 test_mm_mmask_i64gather_ps(__m128 __v1_old, __mmask8 __mask, __m128i __index, void const *__addr) { + // CIR-LABEL: test_mm_mmask_i64gather_ps + // CIR: cir.llvm.intrinsic "x86.avx512.mask.gather3div4.sf" + + // LLVM-LABEL: @test_mm_mmask_i64gather_ps + // LLVM: @llvm.x86.avx512.mask.gather3div4.sf + return _mm_mmask_i64gather_ps(__v1_old, __mask, __index, __addr, 2); +} + +__m128i test_mm_mmask_i64gather_epi32(__m128i __v1_old, __mmask8 __mask, __m128i __index, void const *__addr) { + // CIR-LABEL: test_mm_mmask_i64gather_epi32 + // CIR: cir.llvm.intrinsic "x86.avx512.mask.gather3div4.si" + + // LLVM-LABEL: @test_mm_mmask_i64gather_epi32 + // LLVM: @llvm.x86.avx512.mask.gather3div4.si + return _mm_mmask_i64gather_epi32(__v1_old, __mask, __index, __addr, 2); +} + +__m128 test_mm256_mmask_i64gather_ps(__m128 __v1_old, __mmask8 __mask, __m256i __index, void const *__addr) { + // CIR-LABEL: test_mm256_mmask_i64gather_ps + // CIR: cir.llvm.intrinsic "x86.avx512.mask.gather3div8.sf" + + // LLVM-LABEL: @test_mm256_mmask_i64gather_ps + // LLVM: @llvm.x86.avx512.mask.gather3div8.sf + return _mm256_mmask_i64gather_ps(__v1_old, __mask, __index, __addr, 2); +} + +__m128i test_mm256_mmask_i64gather_epi32(__m128i __v1_old, __mmask8 __mask, __m256i __index, void const *__addr) { + // CIR-LABEL: test_mm256_mmask_i64gather_epi32 + // CIR: cir.llvm.intrinsic "x86.avx512.mask.gather3div8.si" + + // LLVM-LABEL: @test_mm256_mmask_i64gather_epi32 + // LLVM: @llvm.x86.avx512.mask.gather3div8.si + return _mm256_mmask_i64gather_epi32(__v1_old, __mask, __index, __addr, 2); +} + +__m128d test_mm_mask_i32gather_pd(__m128d __v1_old, __mmask8 __mask, __m128i __index, void const *__addr) { + // CIR-LABEL: test_mm_mask_i32gather_pd + // CIR: cir.llvm.intrinsic "x86.avx512.mask.gather3siv2.df" + + // LLVM-LABEL: @test_mm_mask_i32gather_pd + // LLVM: @llvm.x86.avx512.mask.gather3siv2.df + return _mm_mmask_i32gather_pd(__v1_old, __mask, __index, __addr, 2); +} + +__m128i test_mm_mask_i32gather_epi64(__m128i __v1_old, __mmask8 __mask, __m128i __index, void const *__addr) { + // CIR-LABEL: test_mm_mask_i32gather_epi64 + // CIR: cir.llvm.intrinsic "x86.avx512.mask.gather3siv2.di" + + // LLVM-LABEL: @test_mm_mask_i32gather_epi64 + // LLVM: @llvm.x86.avx512.mask.gather3siv2.di + return _mm_mmask_i32gather_epi64(__v1_old, __mask, __index, __addr, 2); +} + +__m256d test_mm256_mask_i32gather_pd(__m256d __v1_old, __mmask8 __mask, __m128i __index, void const *__addr) { + // CIR-LABEL: test_mm256_mask_i32gather_pd + // CIR: cir.llvm.intrinsic "x86.avx512.mask.gather3siv4.df" + + // LLVM-LABEL: @test_mm256_mask_i32gather_pd + // LLVM: @llvm.x86.avx512.mask.gather3siv4.df + return _mm256_mmask_i32gather_pd(__v1_old, __mask, __index, __addr, 2); +} + +__m256i test_mm256_mask_i32gather_epi64(__m256i __v1_old, __mmask8 __mask, __m128i __index, void const *__addr) { + // CIR-LABEL: test_mm256_mask_i32gather_epi64 + // CIR: cir.llvm.intrinsic "x86.avx512.mask.gather3siv4.di" + + // LLVM-LABEL: @test_mm256_mask_i32gather_epi64 + // LLVM: @llvm.x86.avx512.mask.gather3siv4.di + return _mm256_mmask_i32gather_epi64(__v1_old, __mask, __index, __addr, 2); +} + +__m128 test_mm_mask_i32gather_ps(__m128 __v1_old, __mmask8 __mask, __m128i __index, void const *__addr) { + // CIR-LABEL: test_mm_mask_i32gather_ps + // CIR: cir.llvm.intrinsic "x86.avx512.mask.gather3siv4.sf" + + // LLVM-LABEL: @test_mm_mask_i32gather_ps + // LLVM: @llvm.x86.avx512.mask.gather3siv4.sf + return _mm_mmask_i32gather_ps(__v1_old, __mask, __index, __addr, 2); +} + +__m128i test_mm_mask_i32gather_epi32(__m128i __v1_old, __mmask8 __mask, __m128i __index, void const *__addr) { + // CIR-LABEL: test_mm_mask_i32gather_epi32 + // CIR: cir.llvm.intrinsic "x86.avx512.mask.gather3siv4.si" + + // LLVM-LABEL: @test_mm_mask_i32gather_epi32 + // LLVM: @llvm.x86.avx512.mask.gather3siv4.si + return _mm_mmask_i32gather_epi32(__v1_old, __mask, __index, __addr, 2); +} + +__m256 test_mm256_mask_i32gather_ps(__m256 __v1_old, __mmask8 __mask, __m256i __index, void const *__addr) { + // CIR-LABEL: test_mm256_mask_i32gather_ps + // CIR: cir.llvm.intrinsic "x86.avx512.mask.gather3siv8.sf" + + // LLVM-LABEL: @test_mm256_mask_i32gather_ps + // LLVM: @llvm.x86.avx512.mask.gather3siv8.sf + return _mm256_mmask_i32gather_ps(__v1_old, __mask, __index, __addr, 2); +} + +__m256i test_mm256_mask_i32gather_epi32(__m256i __v1_old, __mmask8 __mask, __m256i __index, void const *__addr) { + // CIR-LABEL: test_mm256_mask_i32gather_epi32 + // CIR: cir.llvm.intrinsic "x86.avx512.mask.gather3siv8.si" + + // LLVM-LABEL: @test_mm256_mask_i32gather_epi32 + // LLVM: @llvm.x86.avx512.mask.gather3siv8.si + return _mm256_mmask_i32gather_epi32(__v1_old, __mask, __index, __addr, 2); +}