@@ -1276,6 +1276,9 @@ bool NVPTXDAGToDAGISel::tryLDGLDU(SDNode *N) {
1276
1276
EVT OrigType = N->getValueType (0 );
1277
1277
EVT EltVT = Mem->getMemoryVT ();
1278
1278
unsigned NumElts = 1 ;
1279
+
1280
+ std::optional<unsigned > Opcode;
1281
+
1279
1282
if (EltVT.isVector ()) {
1280
1283
NumElts = EltVT.getVectorNumElements ();
1281
1284
EltVT = EltVT.getVectorElementType ();
@@ -1288,6 +1291,24 @@ bool NVPTXDAGToDAGISel::tryLDGLDU(SDNode *N) {
1288
1291
(EltVT == MVT::i8 && OrigType == MVT::v4i8)) {
1289
1292
assert (NumElts % OrigType.getVectorNumElements () == 0 &&
1290
1293
" NumElts must be divisible by the number of elts in subvectors" );
1294
+ if (N->getOpcode () == ISD::LOAD ||
1295
+ N->getOpcode () == ISD::INTRINSIC_W_CHAIN) {
1296
+ switch (OrigType.getSimpleVT ().SimpleTy ) {
1297
+ case MVT::v2f32:
1298
+ Opcode = N->getOpcode () == ISD::LOAD ? NVPTX::INT_PTX_LDG_GLOBAL_b64
1299
+ : NVPTX::INT_PTX_LDU_GLOBAL_b64;
1300
+ break ;
1301
+ case MVT::v2f16:
1302
+ case MVT::v2bf16:
1303
+ case MVT::v2i16:
1304
+ case MVT::v4i8:
1305
+ Opcode = N->getOpcode () == ISD::LOAD ? NVPTX::INT_PTX_LDG_GLOBAL_b32
1306
+ : NVPTX::INT_PTX_LDU_GLOBAL_b32;
1307
+ break ;
1308
+ default :
1309
+ llvm_unreachable (" Unhandled packed vector type" );
1310
+ }
1311
+ }
1291
1312
EltVT = OrigType;
1292
1313
NumElts /= OrigType.getVectorNumElements ();
1293
1314
}
@@ -1309,50 +1330,51 @@ bool NVPTXDAGToDAGISel::tryLDGLDU(SDNode *N) {
1309
1330
SelectADDR (Op1, Base, Offset);
1310
1331
SDValue Ops[] = {Base, Offset, Chain};
1311
1332
1312
- std::optional<unsigned > Opcode;
1313
- switch (N->getOpcode ()) {
1314
- default :
1315
- return false ;
1316
- case ISD::LOAD:
1317
- Opcode = pickOpcodeForVT (
1318
- EltVT.getSimpleVT ().SimpleTy , NVPTX::INT_PTX_LDG_GLOBAL_i8,
1319
- NVPTX::INT_PTX_LDG_GLOBAL_i16, NVPTX::INT_PTX_LDG_GLOBAL_i32,
1320
- NVPTX::INT_PTX_LDG_GLOBAL_i64, NVPTX::INT_PTX_LDG_GLOBAL_f32,
1321
- NVPTX::INT_PTX_LDG_GLOBAL_f64);
1322
- break ;
1323
- case ISD::INTRINSIC_W_CHAIN:
1324
- Opcode = pickOpcodeForVT (
1325
- EltVT.getSimpleVT ().SimpleTy , NVPTX::INT_PTX_LDU_GLOBAL_i8,
1326
- NVPTX::INT_PTX_LDU_GLOBAL_i16, NVPTX::INT_PTX_LDU_GLOBAL_i32,
1327
- NVPTX::INT_PTX_LDU_GLOBAL_i64, NVPTX::INT_PTX_LDU_GLOBAL_f32,
1328
- NVPTX::INT_PTX_LDU_GLOBAL_f64);
1329
- break ;
1330
- case NVPTXISD::LoadV2:
1331
- Opcode = pickOpcodeForVT (
1332
- EltVT.getSimpleVT ().SimpleTy , NVPTX::INT_PTX_LDG_G_v2i8_ELE,
1333
- NVPTX::INT_PTX_LDG_G_v2i16_ELE, NVPTX::INT_PTX_LDG_G_v2i32_ELE,
1334
- NVPTX::INT_PTX_LDG_G_v2i64_ELE, NVPTX::INT_PTX_LDG_G_v2f32_ELE,
1335
- NVPTX::INT_PTX_LDG_G_v2f64_ELE);
1336
- break ;
1337
- case NVPTXISD::LDUV2:
1338
- Opcode = pickOpcodeForVT (
1339
- EltVT.getSimpleVT ().SimpleTy , NVPTX::INT_PTX_LDU_G_v2i8_ELE,
1340
- NVPTX::INT_PTX_LDU_G_v2i16_ELE, NVPTX::INT_PTX_LDU_G_v2i32_ELE,
1341
- NVPTX::INT_PTX_LDU_G_v2i64_ELE, NVPTX::INT_PTX_LDU_G_v2f32_ELE,
1342
- NVPTX::INT_PTX_LDU_G_v2f64_ELE);
1343
- break ;
1344
- case NVPTXISD::LoadV4:
1345
- Opcode = pickOpcodeForVT (
1346
- EltVT.getSimpleVT ().SimpleTy , NVPTX::INT_PTX_LDG_G_v4i8_ELE,
1347
- NVPTX::INT_PTX_LDG_G_v4i16_ELE, NVPTX::INT_PTX_LDG_G_v4i32_ELE,
1348
- std::nullopt, NVPTX::INT_PTX_LDG_G_v4f32_ELE, std::nullopt);
1349
- break ;
1350
- case NVPTXISD::LDUV4:
1351
- Opcode = pickOpcodeForVT (
1352
- EltVT.getSimpleVT ().SimpleTy , NVPTX::INT_PTX_LDU_G_v4i8_ELE,
1353
- NVPTX::INT_PTX_LDU_G_v4i16_ELE, NVPTX::INT_PTX_LDU_G_v4i32_ELE,
1354
- std::nullopt, NVPTX::INT_PTX_LDU_G_v4f32_ELE, std::nullopt);
1355
- break ;
1333
+ if (!Opcode) {
1334
+ switch (N->getOpcode ()) {
1335
+ default :
1336
+ return false ;
1337
+ case ISD::LOAD:
1338
+ Opcode = pickOpcodeForVT (
1339
+ EltVT.getSimpleVT ().SimpleTy , NVPTX::INT_PTX_LDG_GLOBAL_i8,
1340
+ NVPTX::INT_PTX_LDG_GLOBAL_i16, NVPTX::INT_PTX_LDG_GLOBAL_i32,
1341
+ NVPTX::INT_PTX_LDG_GLOBAL_i64, NVPTX::INT_PTX_LDG_GLOBAL_f32,
1342
+ NVPTX::INT_PTX_LDG_GLOBAL_f64);
1343
+ break ;
1344
+ case ISD::INTRINSIC_W_CHAIN:
1345
+ Opcode = pickOpcodeForVT (
1346
+ EltVT.getSimpleVT ().SimpleTy , NVPTX::INT_PTX_LDU_GLOBAL_i8,
1347
+ NVPTX::INT_PTX_LDU_GLOBAL_i16, NVPTX::INT_PTX_LDU_GLOBAL_i32,
1348
+ NVPTX::INT_PTX_LDU_GLOBAL_i64, NVPTX::INT_PTX_LDU_GLOBAL_f32,
1349
+ NVPTX::INT_PTX_LDU_GLOBAL_f64);
1350
+ break ;
1351
+ case NVPTXISD::LoadV2:
1352
+ Opcode = pickOpcodeForVT (
1353
+ EltVT.getSimpleVT ().SimpleTy , NVPTX::INT_PTX_LDG_G_v2i8_ELE,
1354
+ NVPTX::INT_PTX_LDG_G_v2i16_ELE, NVPTX::INT_PTX_LDG_G_v2i32_ELE,
1355
+ NVPTX::INT_PTX_LDG_G_v2i64_ELE, NVPTX::INT_PTX_LDG_G_v2f32_ELE,
1356
+ NVPTX::INT_PTX_LDG_G_v2f64_ELE);
1357
+ break ;
1358
+ case NVPTXISD::LDUV2:
1359
+ Opcode = pickOpcodeForVT (
1360
+ EltVT.getSimpleVT ().SimpleTy , NVPTX::INT_PTX_LDU_G_v2i8_ELE,
1361
+ NVPTX::INT_PTX_LDU_G_v2i16_ELE, NVPTX::INT_PTX_LDU_G_v2i32_ELE,
1362
+ NVPTX::INT_PTX_LDU_G_v2i64_ELE, NVPTX::INT_PTX_LDU_G_v2f32_ELE,
1363
+ NVPTX::INT_PTX_LDU_G_v2f64_ELE);
1364
+ break ;
1365
+ case NVPTXISD::LoadV4:
1366
+ Opcode = pickOpcodeForVT (
1367
+ EltVT.getSimpleVT ().SimpleTy , NVPTX::INT_PTX_LDG_G_v4i8_ELE,
1368
+ NVPTX::INT_PTX_LDG_G_v4i16_ELE, NVPTX::INT_PTX_LDG_G_v4i32_ELE,
1369
+ std::nullopt, NVPTX::INT_PTX_LDG_G_v4f32_ELE, std::nullopt);
1370
+ break ;
1371
+ case NVPTXISD::LDUV4:
1372
+ Opcode = pickOpcodeForVT (
1373
+ EltVT.getSimpleVT ().SimpleTy , NVPTX::INT_PTX_LDU_G_v4i8_ELE,
1374
+ NVPTX::INT_PTX_LDU_G_v4i16_ELE, NVPTX::INT_PTX_LDU_G_v4i32_ELE,
1375
+ std::nullopt, NVPTX::INT_PTX_LDU_G_v4f32_ELE, std::nullopt);
1376
+ break ;
1377
+ }
1356
1378
}
1357
1379
if (!Opcode)
1358
1380
return false ;
0 commit comments