@@ -127,6 +127,76 @@ define half @ld_global_v8f16(ptr addrspace(1) %ptr) {
127
127
ret half %sum
128
128
}
129
129
130
+ define float @ld_global_v2f32 (ptr addrspace (1 ) %ptr ) {
131
+ ; CHECK-LABEL: ld_global_v2f32(
132
+ ; CHECK: {
133
+ ; CHECK-NEXT: .reg .f32 %f<4>;
134
+ ; CHECK-NEXT: .reg .b64 %rd<2>;
135
+ ; CHECK-EMPTY:
136
+ ; CHECK-NEXT: // %bb.0:
137
+ ; CHECK-NEXT: ld.param.u64 %rd1, [ld_global_v2f32_param_0];
138
+ ; CHECK-NEXT: ld.global.nc.v2.f32 {%f1, %f2}, [%rd1];
139
+ ; CHECK-NEXT: add.rn.f32 %f3, %f1, %f2;
140
+ ; CHECK-NEXT: st.param.f32 [func_retval0], %f3;
141
+ ; CHECK-NEXT: ret;
142
+ %a = load <2 x float >, ptr addrspace (1 ) %ptr , !invariant.load !0
143
+ %v1 = extractelement <2 x float > %a , i32 0
144
+ %v2 = extractelement <2 x float > %a , i32 1
145
+ %sum = fadd float %v1 , %v2
146
+ ret float %sum
147
+ }
148
+
149
+ define float @ld_global_v4f32 (ptr addrspace (1 ) %ptr ) {
150
+ ; CHECK-LABEL: ld_global_v4f32(
151
+ ; CHECK: {
152
+ ; CHECK-NEXT: .reg .f32 %f<8>;
153
+ ; CHECK-NEXT: .reg .b64 %rd<2>;
154
+ ; CHECK-EMPTY:
155
+ ; CHECK-NEXT: // %bb.0:
156
+ ; CHECK-NEXT: ld.param.u64 %rd1, [ld_global_v4f32_param_0];
157
+ ; CHECK-NEXT: ld.global.nc.v4.f32 {%f1, %f2, %f3, %f4}, [%rd1];
158
+ ; CHECK-NEXT: add.rn.f32 %f5, %f1, %f2;
159
+ ; CHECK-NEXT: add.rn.f32 %f6, %f3, %f4;
160
+ ; CHECK-NEXT: add.rn.f32 %f7, %f5, %f6;
161
+ ; CHECK-NEXT: st.param.f32 [func_retval0], %f7;
162
+ ; CHECK-NEXT: ret;
163
+ %a = load <4 x float >, ptr addrspace (1 ) %ptr , !invariant.load !0
164
+ %v1 = extractelement <4 x float > %a , i32 0
165
+ %v2 = extractelement <4 x float > %a , i32 1
166
+ %v3 = extractelement <4 x float > %a , i32 2
167
+ %v4 = extractelement <4 x float > %a , i32 3
168
+ %sum1 = fadd float %v1 , %v2
169
+ %sum2 = fadd float %v3 , %v4
170
+ %sum = fadd float %sum1 , %sum2
171
+ ret float %sum
172
+ }
173
+
174
+ define float @ld_global_v8f32 (ptr addrspace (1 ) %ptr ) {
175
+ ; CHECK-LABEL: ld_global_v8f32(
176
+ ; CHECK: {
177
+ ; CHECK-NEXT: .reg .f32 %f<12>;
178
+ ; CHECK-NEXT: .reg .b64 %rd<2>;
179
+ ; CHECK-EMPTY:
180
+ ; CHECK-NEXT: // %bb.0:
181
+ ; CHECK-NEXT: ld.param.u64 %rd1, [ld_global_v8f32_param_0];
182
+ ; CHECK-NEXT: ld.global.nc.v4.f32 {%f1, %f2, %f3, %f4}, [%rd1+16];
183
+ ; CHECK-NEXT: ld.global.nc.v4.f32 {%f5, %f6, %f7, %f8}, [%rd1];
184
+ ; CHECK-NEXT: add.rn.f32 %f9, %f5, %f7;
185
+ ; CHECK-NEXT: add.rn.f32 %f10, %f1, %f3;
186
+ ; CHECK-NEXT: add.rn.f32 %f11, %f9, %f10;
187
+ ; CHECK-NEXT: st.param.f32 [func_retval0], %f11;
188
+ ; CHECK-NEXT: ret;
189
+ %a = load <8 x float >, ptr addrspace (1 ) %ptr , !invariant.load !0
190
+ %v1 = extractelement <8 x float > %a , i32 0
191
+ %v2 = extractelement <8 x float > %a , i32 2
192
+ %v3 = extractelement <8 x float > %a , i32 4
193
+ %v4 = extractelement <8 x float > %a , i32 6
194
+ %sum1 = fadd float %v1 , %v2
195
+ %sum2 = fadd float %v3 , %v4
196
+ %sum = fadd float %sum1 , %sum2
197
+ ret float %sum
198
+ }
199
+
130
200
define i8 @ld_global_v8i8 (ptr addrspace (1 ) %ptr ) {
131
201
; CHECK-LABEL: ld_global_v8i8(
132
202
; CHECK: {
0 commit comments