Skip to content

Commit 01671ff

Browse files
authored
[X86] collectConcatOps - handle extract_subvector(concat_subvectors(...)) patterns (#143406)
1 parent 59ef2c3 commit 01671ff

File tree

3 files changed

+64
-54
lines changed

3 files changed

+64
-54
lines changed

llvm/lib/Target/X86/X86ISelLowering.cpp

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4311,6 +4311,25 @@ static bool collectConcatOps(SDNode *N, SmallVectorImpl<SDValue> &Ops,
43114311
}
43124312
}
43134313

4314+
if (N->getOpcode() == ISD::EXTRACT_SUBVECTOR) {
4315+
EVT VT = N->getValueType(0);
4316+
SDValue Src = N->getOperand(0);
4317+
uint64_t Idx = N->getConstantOperandVal(1);
4318+
4319+
// Collect all the subvectors from the source vector and slice off the
4320+
// extraction.
4321+
SmallVector<SDValue, 4> SrcOps;
4322+
if (collectConcatOps(Src.getNode(), SrcOps, DAG) &&
4323+
VT.getSizeInBits() > SrcOps[0].getValueSizeInBits() &&
4324+
(VT.getSizeInBits() % SrcOps[0].getValueSizeInBits()) == 0 &&
4325+
(Idx % SrcOps[0].getValueType().getVectorNumElements()) == 0) {
4326+
unsigned SubIdx = Idx / SrcOps[0].getValueType().getVectorNumElements();
4327+
unsigned NumSubs = VT.getSizeInBits() / SrcOps[0].getValueSizeInBits();
4328+
Ops.append(SrcOps.begin() + SubIdx, SrcOps.begin() + SubIdx + NumSubs);
4329+
return true;
4330+
}
4331+
}
4332+
43144333
return false;
43154334
}
43164335

llvm/test/CodeGen/X86/vector-interleaved-store-i8-stride-3.ll

Lines changed: 40 additions & 48 deletions
Original file line numberDiff line numberDiff line change
@@ -740,16 +740,15 @@ define void @store_i8_stride3_vf16(ptr %in.vecptr0, ptr %in.vecptr1, ptr %in.vec
740740
; AVX512-NEXT: vpalignr {{.*#+}} xmm0 = xmm3[5,6,7,8,9,10,11,12,13,14,15],xmm0[0,1,2,3,4]
741741
; AVX512-NEXT: vpalignr {{.*#+}} xmm2 = xmm2[5,6,7,8,9,10,11,12,13,14,15],xmm3[0,1,2,3,4]
742742
; AVX512-NEXT: vpalignr {{.*#+}} xmm1 = xmm4[5,6,7,8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4]
743+
; AVX512-NEXT: vmovdqa {{.*#+}} xmm3 = [0,11,6,1,12,7,2,13,8,3,14,9,4,15,10,5]
744+
; AVX512-NEXT: vpshufb %xmm3, %xmm1, %xmm1
743745
; AVX512-NEXT: vpalignr {{.*#+}} xmm0 = xmm0[5,6,7,8,9,10,11,12,13,14,15],xmm2[0,1,2,3,4]
746+
; AVX512-NEXT: vpshufb %xmm3, %xmm0, %xmm0
744747
; AVX512-NEXT: vpalignr {{.*#+}} xmm2 = xmm2[5,6,7,8,9,10,11,12,13,14,15],xmm4[0,1,2,3,4]
745-
; AVX512-NEXT: vbroadcasti128 {{.*#+}} ymm3 = [0,11,6,1,12,7,2,13,8,3,14,9,4,15,10,5,0,11,6,1,12,7,2,13,8,3,14,9,4,15,10,5]
746-
; AVX512-NEXT: # ymm3 = mem[0,1,0,1]
747748
; AVX512-NEXT: vpshufb %xmm3, %xmm2, %xmm2
749+
; AVX512-NEXT: vmovdqa %xmm0, 16(%rcx)
750+
; AVX512-NEXT: vmovdqa %xmm1, (%rcx)
748751
; AVX512-NEXT: vmovdqa %xmm2, 32(%rcx)
749-
; AVX512-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0
750-
; AVX512-NEXT: vpshufb %ymm3, %ymm0, %ymm0
751-
; AVX512-NEXT: vmovdqa %ymm0, (%rcx)
752-
; AVX512-NEXT: vzeroupper
753752
; AVX512-NEXT: retq
754753
;
755754
; AVX512-FCP-LABEL: store_i8_stride3_vf16:
@@ -763,16 +762,15 @@ define void @store_i8_stride3_vf16(ptr %in.vecptr0, ptr %in.vecptr1, ptr %in.vec
763762
; AVX512-FCP-NEXT: vpalignr {{.*#+}} xmm0 = xmm3[5,6,7,8,9,10,11,12,13,14,15],xmm0[0,1,2,3,4]
764763
; AVX512-FCP-NEXT: vpalignr {{.*#+}} xmm2 = xmm2[5,6,7,8,9,10,11,12,13,14,15],xmm3[0,1,2,3,4]
765764
; AVX512-FCP-NEXT: vpalignr {{.*#+}} xmm1 = xmm4[5,6,7,8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4]
765+
; AVX512-FCP-NEXT: vmovdqa {{.*#+}} xmm3 = [0,11,6,1,12,7,2,13,8,3,14,9,4,15,10,5]
766+
; AVX512-FCP-NEXT: vpshufb %xmm3, %xmm1, %xmm1
766767
; AVX512-FCP-NEXT: vpalignr {{.*#+}} xmm0 = xmm0[5,6,7,8,9,10,11,12,13,14,15],xmm2[0,1,2,3,4]
768+
; AVX512-FCP-NEXT: vpshufb %xmm3, %xmm0, %xmm0
767769
; AVX512-FCP-NEXT: vpalignr {{.*#+}} xmm2 = xmm2[5,6,7,8,9,10,11,12,13,14,15],xmm4[0,1,2,3,4]
768-
; AVX512-FCP-NEXT: vbroadcasti128 {{.*#+}} ymm3 = [0,11,6,1,12,7,2,13,8,3,14,9,4,15,10,5,0,11,6,1,12,7,2,13,8,3,14,9,4,15,10,5]
769-
; AVX512-FCP-NEXT: # ymm3 = mem[0,1,0,1]
770770
; AVX512-FCP-NEXT: vpshufb %xmm3, %xmm2, %xmm2
771+
; AVX512-FCP-NEXT: vmovdqa %xmm0, 16(%rcx)
772+
; AVX512-FCP-NEXT: vmovdqa %xmm1, (%rcx)
771773
; AVX512-FCP-NEXT: vmovdqa %xmm2, 32(%rcx)
772-
; AVX512-FCP-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0
773-
; AVX512-FCP-NEXT: vpshufb %ymm3, %ymm0, %ymm0
774-
; AVX512-FCP-NEXT: vmovdqa %ymm0, (%rcx)
775-
; AVX512-FCP-NEXT: vzeroupper
776774
; AVX512-FCP-NEXT: retq
777775
;
778776
; AVX512DQ-LABEL: store_i8_stride3_vf16:
@@ -786,16 +784,15 @@ define void @store_i8_stride3_vf16(ptr %in.vecptr0, ptr %in.vecptr1, ptr %in.vec
786784
; AVX512DQ-NEXT: vpalignr {{.*#+}} xmm0 = xmm3[5,6,7,8,9,10,11,12,13,14,15],xmm0[0,1,2,3,4]
787785
; AVX512DQ-NEXT: vpalignr {{.*#+}} xmm2 = xmm2[5,6,7,8,9,10,11,12,13,14,15],xmm3[0,1,2,3,4]
788786
; AVX512DQ-NEXT: vpalignr {{.*#+}} xmm1 = xmm4[5,6,7,8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4]
787+
; AVX512DQ-NEXT: vmovdqa {{.*#+}} xmm3 = [0,11,6,1,12,7,2,13,8,3,14,9,4,15,10,5]
788+
; AVX512DQ-NEXT: vpshufb %xmm3, %xmm1, %xmm1
789789
; AVX512DQ-NEXT: vpalignr {{.*#+}} xmm0 = xmm0[5,6,7,8,9,10,11,12,13,14,15],xmm2[0,1,2,3,4]
790+
; AVX512DQ-NEXT: vpshufb %xmm3, %xmm0, %xmm0
790791
; AVX512DQ-NEXT: vpalignr {{.*#+}} xmm2 = xmm2[5,6,7,8,9,10,11,12,13,14,15],xmm4[0,1,2,3,4]
791-
; AVX512DQ-NEXT: vbroadcasti128 {{.*#+}} ymm3 = [0,11,6,1,12,7,2,13,8,3,14,9,4,15,10,5,0,11,6,1,12,7,2,13,8,3,14,9,4,15,10,5]
792-
; AVX512DQ-NEXT: # ymm3 = mem[0,1,0,1]
793792
; AVX512DQ-NEXT: vpshufb %xmm3, %xmm2, %xmm2
793+
; AVX512DQ-NEXT: vmovdqa %xmm0, 16(%rcx)
794+
; AVX512DQ-NEXT: vmovdqa %xmm1, (%rcx)
794795
; AVX512DQ-NEXT: vmovdqa %xmm2, 32(%rcx)
795-
; AVX512DQ-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0
796-
; AVX512DQ-NEXT: vpshufb %ymm3, %ymm0, %ymm0
797-
; AVX512DQ-NEXT: vmovdqa %ymm0, (%rcx)
798-
; AVX512DQ-NEXT: vzeroupper
799796
; AVX512DQ-NEXT: retq
800797
;
801798
; AVX512DQ-FCP-LABEL: store_i8_stride3_vf16:
@@ -809,16 +806,15 @@ define void @store_i8_stride3_vf16(ptr %in.vecptr0, ptr %in.vecptr1, ptr %in.vec
809806
; AVX512DQ-FCP-NEXT: vpalignr {{.*#+}} xmm0 = xmm3[5,6,7,8,9,10,11,12,13,14,15],xmm0[0,1,2,3,4]
810807
; AVX512DQ-FCP-NEXT: vpalignr {{.*#+}} xmm2 = xmm2[5,6,7,8,9,10,11,12,13,14,15],xmm3[0,1,2,3,4]
811808
; AVX512DQ-FCP-NEXT: vpalignr {{.*#+}} xmm1 = xmm4[5,6,7,8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4]
809+
; AVX512DQ-FCP-NEXT: vmovdqa {{.*#+}} xmm3 = [0,11,6,1,12,7,2,13,8,3,14,9,4,15,10,5]
810+
; AVX512DQ-FCP-NEXT: vpshufb %xmm3, %xmm1, %xmm1
812811
; AVX512DQ-FCP-NEXT: vpalignr {{.*#+}} xmm0 = xmm0[5,6,7,8,9,10,11,12,13,14,15],xmm2[0,1,2,3,4]
812+
; AVX512DQ-FCP-NEXT: vpshufb %xmm3, %xmm0, %xmm0
813813
; AVX512DQ-FCP-NEXT: vpalignr {{.*#+}} xmm2 = xmm2[5,6,7,8,9,10,11,12,13,14,15],xmm4[0,1,2,3,4]
814-
; AVX512DQ-FCP-NEXT: vbroadcasti128 {{.*#+}} ymm3 = [0,11,6,1,12,7,2,13,8,3,14,9,4,15,10,5,0,11,6,1,12,7,2,13,8,3,14,9,4,15,10,5]
815-
; AVX512DQ-FCP-NEXT: # ymm3 = mem[0,1,0,1]
816814
; AVX512DQ-FCP-NEXT: vpshufb %xmm3, %xmm2, %xmm2
815+
; AVX512DQ-FCP-NEXT: vmovdqa %xmm0, 16(%rcx)
816+
; AVX512DQ-FCP-NEXT: vmovdqa %xmm1, (%rcx)
817817
; AVX512DQ-FCP-NEXT: vmovdqa %xmm2, 32(%rcx)
818-
; AVX512DQ-FCP-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0
819-
; AVX512DQ-FCP-NEXT: vpshufb %ymm3, %ymm0, %ymm0
820-
; AVX512DQ-FCP-NEXT: vmovdqa %ymm0, (%rcx)
821-
; AVX512DQ-FCP-NEXT: vzeroupper
822818
; AVX512DQ-FCP-NEXT: retq
823819
;
824820
; AVX512BW-LABEL: store_i8_stride3_vf16:
@@ -832,16 +828,15 @@ define void @store_i8_stride3_vf16(ptr %in.vecptr0, ptr %in.vecptr1, ptr %in.vec
832828
; AVX512BW-NEXT: vpalignr {{.*#+}} xmm0 = xmm3[5,6,7,8,9,10,11,12,13,14,15],xmm0[0,1,2,3,4]
833829
; AVX512BW-NEXT: vpalignr {{.*#+}} xmm2 = xmm2[5,6,7,8,9,10,11,12,13,14,15],xmm3[0,1,2,3,4]
834830
; AVX512BW-NEXT: vpalignr {{.*#+}} xmm1 = xmm4[5,6,7,8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4]
831+
; AVX512BW-NEXT: vmovdqa {{.*#+}} xmm3 = [0,11,6,1,12,7,2,13,8,3,14,9,4,15,10,5]
832+
; AVX512BW-NEXT: vpshufb %xmm3, %xmm1, %xmm1
835833
; AVX512BW-NEXT: vpalignr {{.*#+}} xmm0 = xmm0[5,6,7,8,9,10,11,12,13,14,15],xmm2[0,1,2,3,4]
834+
; AVX512BW-NEXT: vpshufb %xmm3, %xmm0, %xmm0
836835
; AVX512BW-NEXT: vpalignr {{.*#+}} xmm2 = xmm2[5,6,7,8,9,10,11,12,13,14,15],xmm4[0,1,2,3,4]
837-
; AVX512BW-NEXT: vbroadcasti128 {{.*#+}} ymm3 = [0,11,6,1,12,7,2,13,8,3,14,9,4,15,10,5,0,11,6,1,12,7,2,13,8,3,14,9,4,15,10,5]
838-
; AVX512BW-NEXT: # ymm3 = mem[0,1,0,1]
839836
; AVX512BW-NEXT: vpshufb %xmm3, %xmm2, %xmm2
837+
; AVX512BW-NEXT: vmovdqa %xmm0, 16(%rcx)
838+
; AVX512BW-NEXT: vmovdqa %xmm1, (%rcx)
840839
; AVX512BW-NEXT: vmovdqa %xmm2, 32(%rcx)
841-
; AVX512BW-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0
842-
; AVX512BW-NEXT: vpshufb %ymm3, %ymm0, %ymm0
843-
; AVX512BW-NEXT: vmovdqa %ymm0, (%rcx)
844-
; AVX512BW-NEXT: vzeroupper
845840
; AVX512BW-NEXT: retq
846841
;
847842
; AVX512BW-FCP-LABEL: store_i8_stride3_vf16:
@@ -855,16 +850,15 @@ define void @store_i8_stride3_vf16(ptr %in.vecptr0, ptr %in.vecptr1, ptr %in.vec
855850
; AVX512BW-FCP-NEXT: vpalignr {{.*#+}} xmm0 = xmm3[5,6,7,8,9,10,11,12,13,14,15],xmm0[0,1,2,3,4]
856851
; AVX512BW-FCP-NEXT: vpalignr {{.*#+}} xmm2 = xmm2[5,6,7,8,9,10,11,12,13,14,15],xmm3[0,1,2,3,4]
857852
; AVX512BW-FCP-NEXT: vpalignr {{.*#+}} xmm1 = xmm4[5,6,7,8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4]
853+
; AVX512BW-FCP-NEXT: vmovdqa {{.*#+}} xmm3 = [0,11,6,1,12,7,2,13,8,3,14,9,4,15,10,5]
854+
; AVX512BW-FCP-NEXT: vpshufb %xmm3, %xmm1, %xmm1
858855
; AVX512BW-FCP-NEXT: vpalignr {{.*#+}} xmm0 = xmm0[5,6,7,8,9,10,11,12,13,14,15],xmm2[0,1,2,3,4]
856+
; AVX512BW-FCP-NEXT: vpshufb %xmm3, %xmm0, %xmm0
859857
; AVX512BW-FCP-NEXT: vpalignr {{.*#+}} xmm2 = xmm2[5,6,7,8,9,10,11,12,13,14,15],xmm4[0,1,2,3,4]
860-
; AVX512BW-FCP-NEXT: vbroadcasti128 {{.*#+}} ymm3 = [0,11,6,1,12,7,2,13,8,3,14,9,4,15,10,5,0,11,6,1,12,7,2,13,8,3,14,9,4,15,10,5]
861-
; AVX512BW-FCP-NEXT: # ymm3 = mem[0,1,0,1]
862858
; AVX512BW-FCP-NEXT: vpshufb %xmm3, %xmm2, %xmm2
859+
; AVX512BW-FCP-NEXT: vmovdqa %xmm0, 16(%rcx)
860+
; AVX512BW-FCP-NEXT: vmovdqa %xmm1, (%rcx)
863861
; AVX512BW-FCP-NEXT: vmovdqa %xmm2, 32(%rcx)
864-
; AVX512BW-FCP-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0
865-
; AVX512BW-FCP-NEXT: vpshufb %ymm3, %ymm0, %ymm0
866-
; AVX512BW-FCP-NEXT: vmovdqa %ymm0, (%rcx)
867-
; AVX512BW-FCP-NEXT: vzeroupper
868862
; AVX512BW-FCP-NEXT: retq
869863
;
870864
; AVX512DQ-BW-LABEL: store_i8_stride3_vf16:
@@ -878,16 +872,15 @@ define void @store_i8_stride3_vf16(ptr %in.vecptr0, ptr %in.vecptr1, ptr %in.vec
878872
; AVX512DQ-BW-NEXT: vpalignr {{.*#+}} xmm0 = xmm3[5,6,7,8,9,10,11,12,13,14,15],xmm0[0,1,2,3,4]
879873
; AVX512DQ-BW-NEXT: vpalignr {{.*#+}} xmm2 = xmm2[5,6,7,8,9,10,11,12,13,14,15],xmm3[0,1,2,3,4]
880874
; AVX512DQ-BW-NEXT: vpalignr {{.*#+}} xmm1 = xmm4[5,6,7,8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4]
875+
; AVX512DQ-BW-NEXT: vmovdqa {{.*#+}} xmm3 = [0,11,6,1,12,7,2,13,8,3,14,9,4,15,10,5]
876+
; AVX512DQ-BW-NEXT: vpshufb %xmm3, %xmm1, %xmm1
881877
; AVX512DQ-BW-NEXT: vpalignr {{.*#+}} xmm0 = xmm0[5,6,7,8,9,10,11,12,13,14,15],xmm2[0,1,2,3,4]
878+
; AVX512DQ-BW-NEXT: vpshufb %xmm3, %xmm0, %xmm0
882879
; AVX512DQ-BW-NEXT: vpalignr {{.*#+}} xmm2 = xmm2[5,6,7,8,9,10,11,12,13,14,15],xmm4[0,1,2,3,4]
883-
; AVX512DQ-BW-NEXT: vbroadcasti128 {{.*#+}} ymm3 = [0,11,6,1,12,7,2,13,8,3,14,9,4,15,10,5,0,11,6,1,12,7,2,13,8,3,14,9,4,15,10,5]
884-
; AVX512DQ-BW-NEXT: # ymm3 = mem[0,1,0,1]
885880
; AVX512DQ-BW-NEXT: vpshufb %xmm3, %xmm2, %xmm2
881+
; AVX512DQ-BW-NEXT: vmovdqa %xmm0, 16(%rcx)
882+
; AVX512DQ-BW-NEXT: vmovdqa %xmm1, (%rcx)
886883
; AVX512DQ-BW-NEXT: vmovdqa %xmm2, 32(%rcx)
887-
; AVX512DQ-BW-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0
888-
; AVX512DQ-BW-NEXT: vpshufb %ymm3, %ymm0, %ymm0
889-
; AVX512DQ-BW-NEXT: vmovdqa %ymm0, (%rcx)
890-
; AVX512DQ-BW-NEXT: vzeroupper
891884
; AVX512DQ-BW-NEXT: retq
892885
;
893886
; AVX512DQ-BW-FCP-LABEL: store_i8_stride3_vf16:
@@ -901,16 +894,15 @@ define void @store_i8_stride3_vf16(ptr %in.vecptr0, ptr %in.vecptr1, ptr %in.vec
901894
; AVX512DQ-BW-FCP-NEXT: vpalignr {{.*#+}} xmm0 = xmm3[5,6,7,8,9,10,11,12,13,14,15],xmm0[0,1,2,3,4]
902895
; AVX512DQ-BW-FCP-NEXT: vpalignr {{.*#+}} xmm2 = xmm2[5,6,7,8,9,10,11,12,13,14,15],xmm3[0,1,2,3,4]
903896
; AVX512DQ-BW-FCP-NEXT: vpalignr {{.*#+}} xmm1 = xmm4[5,6,7,8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4]
897+
; AVX512DQ-BW-FCP-NEXT: vmovdqa {{.*#+}} xmm3 = [0,11,6,1,12,7,2,13,8,3,14,9,4,15,10,5]
898+
; AVX512DQ-BW-FCP-NEXT: vpshufb %xmm3, %xmm1, %xmm1
904899
; AVX512DQ-BW-FCP-NEXT: vpalignr {{.*#+}} xmm0 = xmm0[5,6,7,8,9,10,11,12,13,14,15],xmm2[0,1,2,3,4]
900+
; AVX512DQ-BW-FCP-NEXT: vpshufb %xmm3, %xmm0, %xmm0
905901
; AVX512DQ-BW-FCP-NEXT: vpalignr {{.*#+}} xmm2 = xmm2[5,6,7,8,9,10,11,12,13,14,15],xmm4[0,1,2,3,4]
906-
; AVX512DQ-BW-FCP-NEXT: vbroadcasti128 {{.*#+}} ymm3 = [0,11,6,1,12,7,2,13,8,3,14,9,4,15,10,5,0,11,6,1,12,7,2,13,8,3,14,9,4,15,10,5]
907-
; AVX512DQ-BW-FCP-NEXT: # ymm3 = mem[0,1,0,1]
908902
; AVX512DQ-BW-FCP-NEXT: vpshufb %xmm3, %xmm2, %xmm2
903+
; AVX512DQ-BW-FCP-NEXT: vmovdqa %xmm0, 16(%rcx)
904+
; AVX512DQ-BW-FCP-NEXT: vmovdqa %xmm1, (%rcx)
909905
; AVX512DQ-BW-FCP-NEXT: vmovdqa %xmm2, 32(%rcx)
910-
; AVX512DQ-BW-FCP-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0
911-
; AVX512DQ-BW-FCP-NEXT: vpshufb %ymm3, %ymm0, %ymm0
912-
; AVX512DQ-BW-FCP-NEXT: vmovdqa %ymm0, (%rcx)
913-
; AVX512DQ-BW-FCP-NEXT: vzeroupper
914906
; AVX512DQ-BW-FCP-NEXT: retq
915907
%in.vec0 = load <16 x i8>, ptr %in.vecptr0, align 64
916908
%in.vec1 = load <16 x i8>, ptr %in.vecptr1, align 64

llvm/test/CodeGen/X86/x86-interleaved-access.ll

Lines changed: 5 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -962,16 +962,15 @@ define void @interleaved_store_vf16_i8_stride3(<16 x i8> %a, <16 x i8> %b, <16 x
962962
; AVX512-NEXT: vpalignr {{.*#+}} xmm0 = xmm3[5,6,7,8,9,10,11,12,13,14,15],xmm0[0,1,2,3,4]
963963
; AVX512-NEXT: vpalignr {{.*#+}} xmm2 = xmm2[5,6,7,8,9,10,11,12,13,14,15],xmm3[0,1,2,3,4]
964964
; AVX512-NEXT: vpalignr {{.*#+}} xmm1 = xmm4[5,6,7,8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4]
965+
; AVX512-NEXT: vmovdqa {{.*#+}} xmm3 = [0,11,6,1,12,7,2,13,8,3,14,9,4,15,10,5]
966+
; AVX512-NEXT: vpshufb %xmm3, %xmm1, %xmm1
965967
; AVX512-NEXT: vpalignr {{.*#+}} xmm0 = xmm0[5,6,7,8,9,10,11,12,13,14,15],xmm2[0,1,2,3,4]
968+
; AVX512-NEXT: vpshufb %xmm3, %xmm0, %xmm0
966969
; AVX512-NEXT: vpalignr {{.*#+}} xmm2 = xmm2[5,6,7,8,9,10,11,12,13,14,15],xmm4[0,1,2,3,4]
967-
; AVX512-NEXT: vbroadcasti128 {{.*#+}} ymm3 = [0,11,6,1,12,7,2,13,8,3,14,9,4,15,10,5,0,11,6,1,12,7,2,13,8,3,14,9,4,15,10,5]
968-
; AVX512-NEXT: # ymm3 = mem[0,1,0,1]
969970
; AVX512-NEXT: vpshufb %xmm3, %xmm2, %xmm2
971+
; AVX512-NEXT: vmovdqu %xmm0, 16(%rdi)
972+
; AVX512-NEXT: vmovdqu %xmm1, (%rdi)
970973
; AVX512-NEXT: vmovdqu %xmm2, 32(%rdi)
971-
; AVX512-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0
972-
; AVX512-NEXT: vpshufb %ymm3, %ymm0, %ymm0
973-
; AVX512-NEXT: vmovdqu %ymm0, (%rdi)
974-
; AVX512-NEXT: vzeroupper
975974
; AVX512-NEXT: retq
976975
%1 = shufflevector <16 x i8> %a, <16 x i8> %b, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
977976
%2 = shufflevector <16 x i8> %c, <16 x i8> undef, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>

0 commit comments

Comments
 (0)