@@ -45,7 +45,7 @@ TEST(workspaces, tile_vecElemMul_NoTail) {
45
45
expected.compile ();
46
46
expected.assemble ();
47
47
expected.compute ();
48
- ASSERT_TENSOR_EQ (A, expected );
48
+ ASSERT_TENSOR_EQ (expected, A );
49
49
}
50
50
51
51
TEST (workspaces, tile_vecElemMul_Tail1) {
@@ -83,7 +83,7 @@ TEST(workspaces, tile_vecElemMul_Tail1) {
83
83
expected.compile ();
84
84
expected.assemble ();
85
85
expected.compute ();
86
- ASSERT_TENSOR_EQ (A, expected );
86
+ ASSERT_TENSOR_EQ (expected, A );
87
87
}
88
88
89
89
TEST (workspaces, tile_vecElemMul_Tail2) {
@@ -121,7 +121,7 @@ TEST(workspaces, tile_vecElemMul_Tail2) {
121
121
expected.compile ();
122
122
expected.assemble ();
123
123
expected.compute ();
124
- ASSERT_TENSOR_EQ (A, expected );
124
+ ASSERT_TENSOR_EQ (expected, A );
125
125
126
126
// ir::IRPrinter irp = ir::IRPrinter(cout);
127
127
//
@@ -171,7 +171,7 @@ TEST(workspaces, tile_denseMatMul) {
171
171
expected.compile ();
172
172
expected.assemble ();
173
173
expected.compute ();
174
- ASSERT_TENSOR_EQ (A, expected );
174
+ ASSERT_TENSOR_EQ (expected, A );
175
175
176
176
// ir::IRPrinter irp = ir::IRPrinter(cout);
177
177
//
@@ -218,7 +218,7 @@ TEST(workspaces, precompute2D_add) {
218
218
expected.compile ();
219
219
expected.assemble ();
220
220
expected.compute ();
221
- ASSERT_TENSOR_EQ (A, expected );
221
+ ASSERT_TENSOR_EQ (expected, A );
222
222
223
223
}
224
224
@@ -263,7 +263,7 @@ TEST(workspaces, precompute4D_add) {
263
263
expected.compile ();
264
264
expected.assemble ();
265
265
expected.compute ();
266
- ASSERT_TENSOR_EQ (A, expected );
266
+ ASSERT_TENSOR_EQ (expected, A );
267
267
}
268
268
269
269
TEST (workspaces, precompute4D_multireduce) {
@@ -305,7 +305,7 @@ TEST(workspaces, precompute4D_multireduce) {
305
305
expected.compile ();
306
306
expected.assemble ();
307
307
expected.compute ();
308
- ASSERT_TENSOR_EQ (A, expected );
308
+ ASSERT_TENSOR_EQ (expected, A );
309
309
}
310
310
311
311
TEST (workspaces, precompute3D_TspV) {
@@ -344,7 +344,7 @@ TEST(workspaces, precompute3D_TspV) {
344
344
expected.compile ();
345
345
expected.assemble ();
346
346
expected.compute ();
347
- ASSERT_TENSOR_EQ (A, expected );
347
+ ASSERT_TENSOR_EQ (expected, A );
348
348
349
349
}
350
350
@@ -388,7 +388,7 @@ TEST(workspaces, precompute3D_multipleWS) {
388
388
expected.compile ();
389
389
expected.assemble ();
390
390
expected.compute ();
391
- ASSERT_TENSOR_EQ (A, expected );
391
+ ASSERT_TENSOR_EQ (expected, A );
392
392
393
393
}
394
394
@@ -431,6 +431,178 @@ TEST(workspaces, precompute3D_renamedIVars_TspV) {
431
431
expected.compile ();
432
432
expected.assemble ();
433
433
expected.compute ();
434
- ASSERT_TENSOR_EQ (A, expected );
434
+ ASSERT_TENSOR_EQ (expected, A );
435
435
436
436
}
437
+
438
+ TEST (workspaces, DISABLED_tile_dotProduct_1) {
439
+ // FIXME: Disabled because currently the precompute algorithm does not appropriately
440
+ // find the correct forall substmt to next the WhereNode in after i has been
441
+ // split into i0 and i1. As an example, the first precompute below is incorrect
442
+ // since it should transform
443
+ // forall(i0, forall(i1, A() += B(i) * C(i))) -->
444
+ // forall(i0, where(forall(i1, A() += ws(i1)), forall(i1, ws(i1) += B(i) * C(i))))
445
+ //
446
+ // But currently the algorithm does
447
+ // forall(i0, forall(i1, A() += B(i) * C(i))) -->
448
+ // where(forall(i1, A() += ws(i1)), forall(i0, forall(i1, ws(i1) += B(i) * C(i))))
449
+
450
+ int N = 1024 ;
451
+ Tensor<double > A (" A" );
452
+ Tensor<double > B (" B" , {N}, Format ({Dense}));
453
+ Tensor<double > C (" C" , {N}, Format ({Dense}));
454
+
455
+ for (int i = 0 ; i < N; i++) {
456
+ B.insert ({i}, (double ) i);
457
+ C.insert ({i}, (double ) i);
458
+ }
459
+
460
+ B.pack ();
461
+ C.pack ();
462
+
463
+ IndexVar i (" i" );
464
+ IndexVar i_bounded (" i_bounded" );
465
+ IndexVar i0 (" i0" ), i1 (" i1" );
466
+ IndexExpr BExpr = B (i);
467
+ IndexExpr CExpr = C (i);
468
+ IndexExpr precomputedExpr = (BExpr) * (CExpr);
469
+ A () = precomputedExpr;
470
+
471
+ IndexStmt stmt = A.getAssignment ().concretize ();
472
+ TensorVar B_new (" B_new" , Type (Float64, {(size_t )N}), taco::dense);
473
+ TensorVar C_new (" C_new" , Type (Float64, {(size_t )N}), taco::dense);
474
+ TensorVar precomputed (" precomputed" , Type (Float64, {(size_t )N}), taco::dense);
475
+
476
+ stmt = stmt.bound (i, i_bounded, (size_t )N, BoundType::MaxExact)
477
+ .split (i_bounded, i0, i1, 32 );
478
+ stmt = stmt.precompute (precomputedExpr, i1, i1, precomputed);
479
+ stmt = stmt.precompute (BExpr, i1, i1, B_new)
480
+ .precompute (CExpr, i1, i1, C_new);
481
+
482
+ stmt = stmt.concretize ();
483
+
484
+ A.compile (stmt);
485
+ A.assemble ();
486
+ A.compute ();
487
+
488
+ ir::IRPrinter irp = ir::IRPrinter (cout);
489
+
490
+ cout << stmt << endl;
491
+
492
+ std::shared_ptr<ir::CodeGen> codegen = ir::CodeGen::init_default (cout, ir::CodeGen::ImplementationGen);
493
+ ir::Stmt compute = lower (stmt, " compute" , false , true );
494
+
495
+ irp.print (compute);
496
+ cout << endl;
497
+ codegen->compile (compute, false );
498
+
499
+ Tensor<double > expected (" expected" );
500
+ expected () = B (i) * C (i);
501
+ expected.compile ();
502
+ expected.assemble ();
503
+ expected.compute ();
504
+ ASSERT_TENSOR_EQ (expected, A);
505
+ }
506
+
507
+ TEST (workspaces, DISABLED_tile_dotProduct_2) {
508
+ // FIXME: This is also currently disabled since split(...) scheduling commands
509
+ // only split on the FIRST INSTANCE of an indexVar (assumes only one).
510
+ // This is wrong if the indexVar is not renamed across iw_vars since an indexVar can
511
+ // then occur on BOTH the consumer and producer side and should be split across both.
512
+
513
+ int N = 1024 ;
514
+ Tensor<double > A (" A" );
515
+ Tensor<double > B (" B" , {N}, Format ({Dense}));
516
+ Tensor<double > C (" C" , {N}, Format ({Dense}));
517
+
518
+ for (int i = 0 ; i < N; i++) {
519
+ B.insert ({i}, (double ) i);
520
+ C.insert ({i}, (double ) i);
521
+ }
522
+
523
+ B.pack ();
524
+ C.pack ();
525
+
526
+ IndexVar i (" i" );
527
+ IndexVar i_bounded (" i_bounded" );
528
+ IndexVar i0 (" i0" ), i1 (" i1" );
529
+ IndexExpr BExpr = B (i);
530
+ IndexExpr CExpr = C (i);
531
+ IndexExpr precomputedExpr = (BExpr) * (CExpr);
532
+ A () = precomputedExpr;
533
+
534
+ IndexStmt stmt = A.getAssignment ().concretize ();
535
+ TensorVar B_new (" B_new" , Type (Float64, {(size_t )N}), taco::dense);
536
+ TensorVar C_new (" C_new" , Type (Float64, {(size_t )N}), taco::dense);
537
+ TensorVar precomputed (" precomputed" , Type (Float64, {(size_t )N}), taco::dense);
538
+
539
+ stmt = stmt.precompute (precomputedExpr, i, i, precomputed);
540
+
541
+ stmt = stmt.precompute (BExpr, i, i, B_new)
542
+ .precompute (CExpr, i, i, C_new);
543
+
544
+ stmt = stmt.bound (i, i_bounded, (size_t )N, BoundType::MaxExact)
545
+ .split (i_bounded, i0, i1, 32 );
546
+
547
+ stmt = stmt.concretize ();
548
+
549
+ A.compile (stmt);
550
+ A.assemble ();
551
+ A.compute ();
552
+
553
+ Tensor<double > expected (" expected" );
554
+ expected () = B (i) * C (i);
555
+ expected.compile ();
556
+ expected.assemble ();
557
+ expected.compute ();
558
+ ASSERT_TENSOR_EQ (expected, A);
559
+ }
560
+
561
+ TEST (workspaces, tile_dotProduct_3) {
562
+ int N = 1024 ;
563
+ Tensor<double > A (" A" );
564
+ Tensor<double > B (" B" , {N}, Format ({Dense}));
565
+ Tensor<double > C (" C" , {N}, Format ({Dense}));
566
+
567
+ for (int i = 0 ; i < N; i++) {
568
+ B.insert ({i}, (double ) i);
569
+ C.insert ({i}, (double ) i);
570
+ }
571
+
572
+ B.pack ();
573
+ C.pack ();
574
+
575
+ IndexVar i (" i" );
576
+ IndexVar i_bounded (" i_bounded" );
577
+ IndexVar i0 (" i0" ), i1 (" i1" );
578
+ IndexExpr BExpr = B (i);
579
+ IndexExpr CExpr = C (i);
580
+ IndexExpr precomputedExpr = (BExpr) * (CExpr);
581
+ A () = precomputedExpr;
582
+
583
+ IndexStmt stmt = A.getAssignment ().concretize ();
584
+ TensorVar B_new (" B_new" , Type (Float64, {(size_t )N}), taco::dense);
585
+ TensorVar C_new (" C_new" , Type (Float64, {(size_t )N}), taco::dense);
586
+ TensorVar precomputed (" precomputed" , Type (Float64, {(size_t )N}), taco::dense);
587
+
588
+ stmt = stmt.bound (i, i_bounded, (size_t )N, BoundType::MaxExact)
589
+ .split (i_bounded, i0, i1, 32 );
590
+ stmt = stmt.precompute (precomputedExpr, i0, i0, precomputed);
591
+
592
+ stmt = stmt.precompute (BExpr, i1, i1, B_new)
593
+ .precompute (CExpr, i1, i1, C_new);
594
+
595
+
596
+ stmt = stmt.concretize ();
597
+
598
+ A.compile (stmt);
599
+ A.assemble ();
600
+ A.compute ();
601
+
602
+ Tensor<double > expected (" expected" );
603
+ expected () = B (i) * C (i);
604
+ expected.compile ();
605
+ expected.assemble ();
606
+ expected.compute ();
607
+ ASSERT_TENSOR_EQ (expected, A);
608
+ }
0 commit comments