diff --git a/dash/include/dash/algorithm/Copy.h b/dash/include/dash/algorithm/Copy.h index 368d58ed9..87e5f4732 100644 --- a/dash/include/dash/algorithm/Copy.h +++ b/dash/include/dash/algorithm/Copy.h @@ -1186,7 +1186,6 @@ copy_async( * \ingroup DashAlgorithms */ template < - typename ValueType, class GlobInputIt, class GlobOutputIt > GlobOutputIt copy( @@ -1196,11 +1195,46 @@ GlobOutputIt copy( { DASH_LOG_TRACE("dash::copy()", "blocking, global to global"); + DASH_LOG_TRACE_VAR("dash::copy()", in_first); + DASH_LOG_TRACE_VAR("dash::copy()", in_last); + DASH_LOG_TRACE_VAR("dash::copy()", out_first); + + auto num_elements = dash::distance(in_first, in_last); + auto li_range_in = local_index_range(in_first, in_last); + auto num_local_elem = li_range_in.end - li_range_in.begin; + DASH_LOG_TRACE_VAR("dash::copy()", num_elements); + DASH_LOG_TRACE_VAR("dash::copy()", num_local_elem); + + + // copy our local portion into the global output range + if (num_local_elem > 0) { + auto pattern = in_first.pattern(); + // the distance from the first local element to the in_first iterator + auto in_offset = pattern.global(li_range_in.begin) + - in_first.global().gpos(); + + // the first local element + auto local_in_first = in_first + in_offset; + // the last local element + auto local_in_last = in_first + (num_local_elem + in_offset - 1); + auto local_out_first = out_first + in_offset; + + DASH_LOG_TRACE("Copying from range \n [", + pattern.global(li_range_in.begin), ", ", + pattern.global(li_range_in.end - 1), "] \n [", local_in_first, + "] to \n ", local_out_first, " (global offset ", in_offset, ") "); + + dash::copy( + local_in_first.local(), + // pointer one past the last element + local_in_last.local() + 1, + local_out_first); + } // TODO: // - Implement adapter for local-to-global dash::copy here // - Return if global input range has no local sub-range - return GlobOutputIt(); + return (out_first + num_elements); } #endif // DOXYGEN diff --git a/dash/include/dash/algorithm/Transform.h b/dash/include/dash/algorithm/Transform.h index 4090082e2..f5364557c 100644 --- a/dash/include/dash/algorithm/Transform.h +++ b/dash/include/dash/algorithm/Transform.h @@ -4,6 +4,7 @@ #include #include +#include #include #include #include @@ -48,8 +49,33 @@ inline dart_ret_t transform_blocking_impl( return result; } +/** + * Wrapper of the non-blocking DART accumulate operation with local completion. + * Allows re-use of \c values pointer after the call returns. + */ +template< typename ValueType > +dart_ret_t transform_local_blocking_impl( + dart_gptr_t dest, + ValueType * values, + size_t nvalues, + dart_operation_t op) +{ + static_assert(dash::dart_datatype::value != DART_TYPE_UNDEFINED, + "Cannot accumulate unknown type!"); + + dart_ret_t result = dart_accumulate( + dest, + reinterpret_cast(values), + nvalues, + dash::dart_datatype::value, + op); + dart_flush_local(dest); + return result; +} + /** * Wrapper of the non-blocking DART accumulate operation. + * The pointer \c values should not be re-used before the operation completed. */ template< typename ValueType > dart_ret_t transform_impl( @@ -67,7 +93,6 @@ dart_ret_t transform_impl( nvalues, dash::dart_datatype::value, op); - dart_flush_local(dest); return result; } @@ -271,40 +296,65 @@ GlobOutputIt transform( BinaryOperation binary_op) { DASH_LOG_DEBUG("dash::transform(af, al, bf, outf, binop)"); + auto &pattern = out_first.pattern(); // Outut range different from rhs input range is not supported yet - auto in_first = in_a_first; - auto in_last = in_a_last; - std::vector in_range; + ValueType* in_first = &(*in_a_first); + ValueType* in_last = &(*in_a_last); + // Number of elements in local range: + size_t num_local_elements = std::distance(in_first, in_last); + auto out_last = out_first + num_local_elements; + if (out_last.gpos() > pattern.size()) { + DASH_THROW(dash::exception::OutOfRange, + "Too many input elements in dash::transform"); + } if (in_b_first == out_first) { // Output range is rhs input range: C += A // Input is (in_a_first, in_a_last). } else { // Output range different from rhs input range: C = A+B // Input is (in_a_first, in_a_last) + (in_b_first, in_b_last): - std::transform( - in_a_first, in_a_last, + dash::copy( in_b_first, - std::back_inserter(in_range), - binary_op); - in_first = in_range.data(); - in_last = in_first + in_range.size(); + in_b_first + std::distance(in_a_first, in_a_last), + out_first); } dash::util::Trace trace("transform"); - // Resolve local range from global range: - // Number of elements in local range: - size_t num_local_elements = std::distance(in_first, in_last); // Global iterator to dart_gptr_t: dart_gptr_t dest_gptr = out_first.dart_gptr(); // Send accumulate message: - trace.enter_state("transform_blocking"); - dash::internal::transform_blocking_impl( + auto &team = pattern.team(); + size_t towrite = num_local_elements; + auto out_it = out_first; + auto in_it = in_first; + while (towrite > 0) { + auto lpos = out_it.lpos(); + size_t lsize = pattern.local_size(lpos.unit); + size_t num_values = std::min(lsize - lpos.index, towrite); + dart_gptr_t dest_gptr = out_it.dart_gptr(); + // use non-blocking transform and wait for all at the end + dash::internal::transform_impl( dest_gptr, - in_first, - num_local_elements, + in_it, + num_values, binary_op.dart_operation()); - trace.exit_state("transform_blocking"); + out_it += num_values; + in_it += num_values; + towrite -= num_values; + } + +// out_first.team().barrier(); + dart_flush_all(out_first.dart_gptr()); + + +// trace.enter_state("transform_blocking"); +// dash::internal::transform_blocking_impl( +// dest_gptr, +// in_first, +// num_local_elements, +// binary_op.dart_operation()); +// trace.exit_state("transform_blocking"); // The position past the last element transformed in global element space // cannot be resolved from the size of the local range if the local range // spans over more than one block. Otherwise, the difference of two global @@ -320,7 +370,7 @@ GlobOutputIt transform( // For ranges over block borders, we would have to resolve the global // position past the last element transformed from the iterator's pattern // (see dash::PatternIterator). - return out_first + num_local_elements; + return out_it; } /** diff --git a/dash/include/dash/iterator/GlobViewIter.h b/dash/include/dash/iterator/GlobViewIter.h index e568719aa..8cd19022a 100644 --- a/dash/include/dash/iterator/GlobViewIter.h +++ b/dash/include/dash/iterator/GlobViewIter.h @@ -1091,10 +1091,10 @@ std::ostream & operator<<( ElementType, Pattern, GlobStaticMem, Pointer, Reference> & it) { std::ostringstream ss; - dash::GlobPtr ptr(it); +// dash::GlobPtr ptr(it); ss << "dash::GlobViewIter<" << typeid(ElementType).name() << ">(" << "idx:" << it._idx << ", " - << "gptr:" << ptr << ")"; + << "gptr:" << it.global().dart_gptr() << ")"; return operator<<(os, ss.str()); } diff --git a/dash/test/algorithm/CopyTest.cc b/dash/test/algorithm/CopyTest.cc index c27b60f70..3b00107eb 100644 --- a/dash/test/algorithm/CopyTest.cc +++ b/dash/test/algorithm/CopyTest.cc @@ -5,6 +5,9 @@ #include #include +#include +#include +#include #include #include #include @@ -803,6 +806,54 @@ TEST_F(CopyTest, AsyncGlobalToLocalBlock) } } + +TEST_F(CopyTest, GlobalToGlobal) +{ + using value_t = int; + constexpr int elem_per_unit = 100; + dash::Array source(dash::size() * elem_per_unit); + dash::Array target(dash::size() * elem_per_unit); + + dash::fill(target.begin(), target.end(), 0); + dash::generate_with_index(source.begin(), source.end(), + [](size_t idx) { + return dash::myid() * 1000 + idx; + } + ); + + source.barrier(); + + // copy the full range + dash::copy(source.begin(), source.end(), target.begin()); + source.barrier(); + + dash::for_each_with_index(target.begin(), target.end(), + [](value_t val, size_t idx) { + ASSERT_EQ_U(val, dash::myid() * 1000 + idx); + } + ); + + // copy the range with an offset (effectively moving the input + // range to the left by 1) + dash::copy(source.begin() + 1, source.end(), target.begin()); + source.barrier(); + + dash::for_each_with_index(target.begin(), target.end() - 1, + [](value_t val, size_t idx) { + std::cout << idx << ": " << val << std::endl; + // the array has shifted so the last element is different + if ((idx % elem_per_unit) == (elem_per_unit - 1)) { + // the last element comes from the next unit + // this element has not been copied on the last unit + ASSERT_EQ_U(val, (dash::myid() + 1) * 1000 + idx + 1); + } else { + ASSERT_EQ_U(val, dash::myid() * 1000 + idx + 1); + } + } + ); + +} + #if 0 // TODO TEST_F(CopyTest, AsyncAllToLocalVector) diff --git a/dash/test/algorithm/TransformTest.cc b/dash/test/algorithm/TransformTest.cc index 4a9d6e4c9..85370ec0c 100644 --- a/dash/test/algorithm/TransformTest.cc +++ b/dash/test/algorithm/TransformTest.cc @@ -3,6 +3,8 @@ #include #include +#include +#include #include #include @@ -221,3 +223,41 @@ TEST_F(TransformTest, MatrixGlobalPlusGlobalBlocking) EXPECT_EQ_U(first_l_block_a_begin, first_l_block_a_offsets); } + + +TEST_F(TransformTest, LocalIteratorInput) +{ + using value_t = int; + std::vector local_v(100); + size_t idx = 0; + std::fill(local_v.begin(), local_v.end(), (value_t)dash::myid()); + for (auto& elem : local_v) { + elem = dash::myid() * 1000 + idx; + idx++; + } + dash::Array global_v(local_v.size() + 1); + dash::fill(global_v.begin(), global_v.end(), 0.0); + global_v.barrier(); + // start from the second element + auto it = dash::transform( + local_v.begin(), + local_v.end(), + global_v.begin() + 1, + global_v.begin() + 1, + dash::max() + ); + + global_v.barrier(); + + ASSERT_EQ_U(it, global_v.end()); + +// size_t idx = 0; + + dash::for_each_with_index(global_v.begin() + 1, global_v.end(), + [](value_t val, size_t idx){ + ASSERT_EQ_U(val, (dash::size() - 1) * 1000 + (idx - 1)); + ++idx; + }); + + global_v.barrier(); +} diff --git a/dash/test/container/MatrixTest.cc b/dash/test/container/MatrixTest.cc index 10f609986..f99603ecc 100644 --- a/dash/test/container/MatrixTest.cc +++ b/dash/test/container/MatrixTest.cc @@ -721,28 +721,39 @@ TEST_F(MatrixTest, BlockCopy) dash::Team::All(), team_spec); // Fill matrix + auto block_a = matrix_a.block(1); + auto block_b = matrix_b.block(0); if (myid == 0) { LOG_MESSAGE("Assigning matrix values"); - for(size_t col = 0; col < matrix_a.extent(0); ++col) { - for(size_t row = 0; row < matrix_a.extent(1); ++row) { - auto value = (row * matrix_a.extent(0)) + col; - matrix_a[col][row] = value; - matrix_b[col][row] = value; + for(size_t row = 0; row < matrix_a.extent(0); ++row) { + for(size_t col = 0; col < matrix_a.extent(1); ++col) { + auto value = (row * 1000) + col; + matrix_a[row][col] = value; + matrix_b[row][col] = value; } } } - LOG_MESSAGE("Wait for team barrier ..."); - dash::barrier(); - LOG_MESSAGE("Team barrier passed"); + + matrix_b.barrier(); + + LOG_MESSAGE("Copying block"); // Copy block 1 of matrix_a to block 0 of matrix_b: - dash::copy(matrix_a.block(1).begin(), - matrix_a.block(1).end(), - matrix_b.block(0).begin()); + dash::copy(block_a.begin(), + block_a.end(), + block_b.begin()); + matrix_b.barrier(); - LOG_MESSAGE("Wait for team barrier ..."); - dash::barrier(); - LOG_MESSAGE("Team barrier passed"); + LOG_MESSAGE("Checking copy result"); + if (myid == 0) { + LOG_MESSAGE("Checking copied matrix block values"); + for(size_t col = 0; col < block_a.extent(0); ++col) { + for(size_t row = 0; row < block_a.extent(1); ++row) { + ASSERT_EQ_U(static_cast(block_b[col][row]), + static_cast(block_a[col][row])); + } + } + } } TEST_F(MatrixTest, StorageOrder)