Skip to content

Commit 048ab9e

Browse files
committed
[NVPTX] add coverage for v2f32 in ldg-invariant and fp-contract
for fp-contract: - test folding of fma.f32x2 - bump SM version to 100 for ldg-invariant: - test proper splitting of loads on vectors of f32
1 parent b0b64af commit 048ab9e

File tree

1 file changed

+70
-0
lines changed

1 file changed

+70
-0
lines changed

llvm/test/CodeGen/NVPTX/ldg-invariant.ll

Lines changed: 70 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -127,6 +127,76 @@ define half @ld_global_v8f16(ptr addrspace(1) %ptr) {
127127
ret half %sum
128128
}
129129

130+
define float @ld_global_v2f32(ptr addrspace(1) %ptr) {
131+
; CHECK-LABEL: ld_global_v2f32(
132+
; CHECK: {
133+
; CHECK-NEXT: .reg .f32 %f<4>;
134+
; CHECK-NEXT: .reg .b64 %rd<2>;
135+
; CHECK-EMPTY:
136+
; CHECK-NEXT: // %bb.0:
137+
; CHECK-NEXT: ld.param.u64 %rd1, [ld_global_v2f32_param_0];
138+
; CHECK-NEXT: ld.global.nc.v2.f32 {%f1, %f2}, [%rd1];
139+
; CHECK-NEXT: add.rn.f32 %f3, %f1, %f2;
140+
; CHECK-NEXT: st.param.f32 [func_retval0], %f3;
141+
; CHECK-NEXT: ret;
142+
%a = load <2 x float>, ptr addrspace(1) %ptr, !invariant.load !0
143+
%v1 = extractelement <2 x float> %a, i32 0
144+
%v2 = extractelement <2 x float> %a, i32 1
145+
%sum = fadd float %v1, %v2
146+
ret float %sum
147+
}
148+
149+
define float @ld_global_v4f32(ptr addrspace(1) %ptr) {
150+
; CHECK-LABEL: ld_global_v4f32(
151+
; CHECK: {
152+
; CHECK-NEXT: .reg .f32 %f<8>;
153+
; CHECK-NEXT: .reg .b64 %rd<2>;
154+
; CHECK-EMPTY:
155+
; CHECK-NEXT: // %bb.0:
156+
; CHECK-NEXT: ld.param.u64 %rd1, [ld_global_v4f32_param_0];
157+
; CHECK-NEXT: ld.global.nc.v4.f32 {%f1, %f2, %f3, %f4}, [%rd1];
158+
; CHECK-NEXT: add.rn.f32 %f5, %f1, %f2;
159+
; CHECK-NEXT: add.rn.f32 %f6, %f3, %f4;
160+
; CHECK-NEXT: add.rn.f32 %f7, %f5, %f6;
161+
; CHECK-NEXT: st.param.f32 [func_retval0], %f7;
162+
; CHECK-NEXT: ret;
163+
%a = load <4 x float>, ptr addrspace(1) %ptr, !invariant.load !0
164+
%v1 = extractelement <4 x float> %a, i32 0
165+
%v2 = extractelement <4 x float> %a, i32 1
166+
%v3 = extractelement <4 x float> %a, i32 2
167+
%v4 = extractelement <4 x float> %a, i32 3
168+
%sum1 = fadd float %v1, %v2
169+
%sum2 = fadd float %v3, %v4
170+
%sum = fadd float %sum1, %sum2
171+
ret float %sum
172+
}
173+
174+
define float @ld_global_v8f32(ptr addrspace(1) %ptr) {
175+
; CHECK-LABEL: ld_global_v8f32(
176+
; CHECK: {
177+
; CHECK-NEXT: .reg .f32 %f<12>;
178+
; CHECK-NEXT: .reg .b64 %rd<2>;
179+
; CHECK-EMPTY:
180+
; CHECK-NEXT: // %bb.0:
181+
; CHECK-NEXT: ld.param.u64 %rd1, [ld_global_v8f32_param_0];
182+
; CHECK-NEXT: ld.global.nc.v4.f32 {%f1, %f2, %f3, %f4}, [%rd1+16];
183+
; CHECK-NEXT: ld.global.nc.v4.f32 {%f5, %f6, %f7, %f8}, [%rd1];
184+
; CHECK-NEXT: add.rn.f32 %f9, %f5, %f7;
185+
; CHECK-NEXT: add.rn.f32 %f10, %f1, %f3;
186+
; CHECK-NEXT: add.rn.f32 %f11, %f9, %f10;
187+
; CHECK-NEXT: st.param.f32 [func_retval0], %f11;
188+
; CHECK-NEXT: ret;
189+
%a = load <8 x float>, ptr addrspace(1) %ptr, !invariant.load !0
190+
%v1 = extractelement <8 x float> %a, i32 0
191+
%v2 = extractelement <8 x float> %a, i32 2
192+
%v3 = extractelement <8 x float> %a, i32 4
193+
%v4 = extractelement <8 x float> %a, i32 6
194+
%sum1 = fadd float %v1, %v2
195+
%sum2 = fadd float %v3, %v4
196+
%sum = fadd float %sum1, %sum2
197+
ret float %sum
198+
}
199+
130200
define i8 @ld_global_v8i8(ptr addrspace(1) %ptr) {
131201
; CHECK-LABEL: ld_global_v8i8(
132202
; CHECK: {

0 commit comments

Comments
 (0)