diff --git a/Makefile b/Makefile index c34823ab5a..b7d1b9f3dd 100644 --- a/Makefile +++ b/Makefile @@ -759,6 +759,15 @@ endif LIBS += $(NCLIB) endif +ifneq "$(SCOTCH)" "" + SCOTCH_FCINCLUDES += -I$(SCOTCH)/src/include + SCOTCH_LIBS += -L$(SCOTCH)/lib -lscotch -lscotcherr + SCOTCH_FFLAGS = -DMPAS_SCOTCH + + FCINCLUDES += $(SCOTCH_FCINCLUDES) + LIBS += $(SCOTCH_LIBS) + override CPPFLAGS += $(SCOTCH_FFLAGS) +endif ifneq "$(PNETCDF)" "" ifneq ($(wildcard $(PNETCDF)/lib/libpnetcdf.*), ) @@ -1415,6 +1424,33 @@ musica_fortran_test: $(eval MUSICA_FORTRAN_VERSION := $(shell pkg-config --modversion musica-fortran)) $(if $(findstring 1,$(MUSICA_FORTRAN_TEST)), $(info Built a simple test program with MUSICA-Fortran version $(MUSICA_FORTRAN_VERSION)), ) +scotch_fortran_test: + @# + @# Create a Fortran test program that will link against the SCOTCH library + @# + $(info Checking for a working MUSICA-Fortran library...) + $(eval SCOTCH_FORTRAN_TEST := $(shell $\ + printf "program test_scotch_fortran\n$\ + & include \"scotchf.h\"\n$\ + & doubleprecision :: scotchgraph (scotch_graphdim)\n$\ + & integer :: ierr\n$\ + & ierr = 0\n$\ + & call scotchfgraphinit(scotchgraph (1), ierr)\n$\ + & call scotchfgraphexit(scotchgraph(1))\n$\ + end program test_scotch_fortran\n" | sed 's/&/ /' > test_scotch_fortran.f90; $\ + $\ + $(FC) $(SCOTCH_FCINCLUDES) $(SCOTCH_FFLAGS) test_scotch_fortran.f90 -o test_scotch_fortran.x $(SCOTCH_LIBS) > /dev/null 2>&1; $\ + scotch_fortran_status=$$?; $\ + rm -f test_scotch_fortran.f90 test_scotch_fortran.x; $\ + if [ $$scotch_fortran_status -eq 0 ]; then $\ + printf "1"; $\ + else $\ + printf "0"; $\ + fi $\ + )) + $(if $(findstring 0,$(SCOTCH_FORTRAN_TEST)), $(error Could not build a simple test program with Scotch)) + $(if $(findstring 1,$(SCOTCH_FORTRAN_TEST)), $(info Built a simple test program with Scotch )) + pnetcdf_test: @# @# Create test C programs that look for PNetCDF header file and some symbols in it @@ -1471,6 +1507,13 @@ else MUSICA_MESSAGE = "MPAS was not linked with the MUSICA-Fortran library." endif +ifneq "$(SCOTCH_FFLAGS)" "" +MAIN_DEPS += scotch_fortran_test +SCOTCH_MESSAGE = "MPAS has been linked with the Scotch graph partitioning library." +else +SCOTCH_MESSAGE = "MPAS was NOT linked with the Scotch graph partitioning library." +endif + mpas_main: $(MAIN_DEPS) cd src; $(MAKE) FC="$(FC)" \ CC="$(CC)" \ @@ -1508,6 +1551,7 @@ mpas_main: $(MAIN_DEPS) @echo $(OPENMP_OFFLOAD_MESSAGE) @echo $(OPENACC_MESSAGE) @echo $(MUSICA_MESSAGE) + @echo $(SCOTCH_MESSAGE) @echo $(SHAREDLIB_MESSAGE) ifeq "$(AUTOCLEAN)" "true" @echo $(AUTOCLEAN_MESSAGE) diff --git a/src/core_atmosphere/Registry.xml b/src/core_atmosphere/Registry.xml index bdf47ec7b4..ade3916a16 100644 --- a/src/core_atmosphere/Registry.xml +++ b/src/core_atmosphere/Registry.xml @@ -319,7 +319,7 @@ - diff --git a/src/framework/mpas_block_decomp.F b/src/framework/mpas_block_decomp.F index 4f3d197d5d..e26230f2d9 100644 --- a/src/framework/mpas_block_decomp.F +++ b/src/framework/mpas_block_decomp.F @@ -25,6 +25,10 @@ module mpas_block_decomp use mpas_derived_types use mpas_io_units use mpas_log + + #ifdef MPAS_SCOTCH + include "scotchf.h" + #endif type graph integer :: nVerticesTotal @@ -49,7 +53,7 @@ module mpas_block_decomp ! !----------------------------------------------------------------------- subroutine mpas_block_decomp_cells_for_proc(dminfo, partial_global_graph_info, local_cell_list, block_id, block_start, & - block_count, numBlocks, explicitProcDecomp, blockFilePrefix, procFilePrefix)!{{{ + block_count, cellsOnCellFull, nEdgesOnCellFull, numBlocks, explicitProcDecomp, blockFilePrefix, procFilePrefix)!{{{ implicit none @@ -59,6 +63,8 @@ subroutine mpas_block_decomp_cells_for_proc(dminfo, partial_global_graph_info, l integer, dimension(:), pointer :: block_id !< Output: list of global block id's this processor owns integer, dimension(:), pointer :: block_start !< Output: offset in local_cell_list for this blocks list of cells integer, dimension(:), pointer :: block_count !< Output: number of cells in blocks + type (field2dInteger), intent(in) :: cellsOnCellFull + type (field1dInteger), intent(in) :: nEdgesOnCellFull integer, intent(in) :: numBlocks !< Input: Number of blocks (from config_num_blocks) logical, intent(in) :: explicitProcDecomp !< Input: Logical flag controlling if blocks are explicitly assigned to processors @@ -71,12 +77,21 @@ subroutine mpas_block_decomp_cells_for_proc(dminfo, partial_global_graph_info, l integer, dimension(:), allocatable :: local_block_list integer, dimension(:,:), allocatable :: sorted_local_cell_list - integer :: i, global_block_id, local_block_id, owning_proc, iunit, istatus - integer :: blocks_per_proc, err + integer, dimension(:), allocatable :: global_block_id_arr, owning_proc_arr + integer :: i, global_block_id, local_block_id, owning_proc, iunit, ounit, istatus, ostatus, j, k + integer :: blocks_per_proc, err, ierr integer, dimension(:), pointer :: local_nvertices - character (len=StrKIND) :: filename + character (len=StrKIND) :: filename, msg logical :: no_blocks + integer :: nTotalEdgesGraph = 0 + integer, dimension(:), allocatable :: edgetab, verttab, parttab + + logical :: useScotch +#ifdef MPAS_SCOTCH + doubleprecision :: stradat (scotch_stratdim) + doubleprecision :: scotchgraph (scotch_graphdim) +#endif no_blocks = .false. @@ -95,71 +110,195 @@ subroutine mpas_block_decomp_cells_for_proc(dminfo, partial_global_graph_info, l allocate(local_nvertices(dminfo % nprocs)) allocate(global_start(dminfo % nprocs)) allocate(global_list(partial_global_graph_info % nVerticesTotal)) + allocate(global_block_id_arr(partial_global_graph_info % nVerticesTotal)) + allocate(owning_proc_arr(partial_global_graph_info % nVerticesTotal)) if (dminfo % my_proc_id == IO_NODE) then + useScotch = .false. + if ( trim(blockFilePrefix) == '' ) then + call mpas_log_write('Namelist option config_block_decomp_file_prefix is set to \'\' ', MPAS_LOG_ERR) +#ifdef MPAS_SCOTCH + useScotch = .true. +#else + call mpas_log_write('Either build MPAS with the Scotch library or provide a valid file prefix for config_block_decomp_file_prefix', MPAS_LOG_CRIT) +#endif + else + if (dminfo % total_blocks < 10) then + write(filename,'(a,i1)') trim(blockFilePrefix), dminfo % total_blocks + else if (dminfo % total_blocks < 100) then + write(filename,'(a,i2)') trim(blockFilePrefix), dminfo % total_blocks + else if (dminfo % total_blocks < 1000) then + write(filename,'(a,i3)') trim(blockFilePrefix), dminfo % total_blocks + else if (dminfo % total_blocks < 10000) then + write(filename,'(a,i4)') trim(blockFilePrefix), dminfo % total_blocks + else if (dminfo % total_blocks < 100000) then + write(filename,'(a,i5)') trim(blockFilePrefix), dminfo % total_blocks + else if (dminfo % total_blocks < 1000000) then + write(filename,'(a,i6)') trim(blockFilePrefix), dminfo % total_blocks + else if (dminfo % total_blocks < 10000000) then + write(filename,'(a,i7)') trim(blockFilePrefix), dminfo % total_blocks + else if (dminfo % total_blocks < 100000000) then + write(filename,'(a,i8)') trim(blockFilePrefix), dminfo % total_blocks + end if - if (dminfo % total_blocks < 10) then - write(filename,'(a,i1)') trim(blockFilePrefix), dminfo % total_blocks - else if (dminfo % total_blocks < 100) then - write(filename,'(a,i2)') trim(blockFilePrefix), dminfo % total_blocks - else if (dminfo % total_blocks < 1000) then - write(filename,'(a,i3)') trim(blockFilePrefix), dminfo % total_blocks - else if (dminfo % total_blocks < 10000) then - write(filename,'(a,i4)') trim(blockFilePrefix), dminfo % total_blocks - else if (dminfo % total_blocks < 100000) then - write(filename,'(a,i5)') trim(blockFilePrefix), dminfo % total_blocks - else if (dminfo % total_blocks < 1000000) then - write(filename,'(a,i6)') trim(blockFilePrefix), dminfo % total_blocks - else if (dminfo % total_blocks < 10000000) then - write(filename,'(a,i7)') trim(blockFilePrefix), dminfo % total_blocks - else if (dminfo % total_blocks < 100000000) then - write(filename,'(a,i8)') trim(blockFilePrefix), dminfo % total_blocks - end if - - call mpas_new_unit(iunit) - open(unit=iunit, file=trim(filename), form='formatted', status='old', iostat=istatus) - - if (istatus /= 0) then - call mpas_log_write('Could not open block decomposition file for $i blocks.', MPAS_LOG_ERR, intArgs=(/dminfo % total_blocks/) ) - call mpas_log_write('Filename: '//trim(filename), MPAS_LOG_CRIT) - end if + call mpas_new_unit(iunit) + open(unit=iunit, file=trim(filename), form='formatted', status='old', iostat=istatus) + + if (istatus /= 0) then + call mpas_log_write('Could not open block decomposition file for $i blocks.', MPAS_LOG_WARN, intArgs=(/dminfo % total_blocks/) ) + call mpas_log_write('Filename: '//trim(filename),MPAS_LOG_WARN) +#ifdef MPAS_SCOTCH + useScotch = .true. +#else + call mpas_log_write('Either build MPAS with Scotch library or provide a valid file prefix for config_block_decomp_file_prefix', MPAS_LOG_CRIT) +#endif + end if + end if - local_nvertices(:) = 0 - do i=1,partial_global_graph_info % nVerticesTotal - read(unit=iunit, fmt=*, iostat=err) global_block_id - - if ( err .ne. 0 ) then - call mpas_log_write('Decomoposition file: ' // trim(filename) // ' contains less than $i cells', & - MPAS_LOG_CRIT, intArgs=(/partial_global_graph_info % nVerticesTotal/) ) - end if - call mpas_get_owning_proc(dminfo, global_block_id, owning_proc) - local_nvertices(owning_proc+1) = local_nvertices(owning_proc+1) + 1 - end do + if (useScotch) then +#ifdef MPAS_SCOTCH + call mpas_log_write('Using LibScotch for graph partitioning') + do i=1,partial_global_graph_info % nVerticesTotal + do j=1,nEdgesOnCellFull% array(i) + + if (cellsOnCellFull%array(j,i) == 0) cycle + nTotalEdgesGraph = nTotalEdgesGraph + 1 + !nTotalEdgesGraph = nTotalEdgesGraph + nEdgesOnCellFull% array(i) + ! do j=1,nEdgesOnCellFull% array(i) + ! call mpas_log_write('i=$i j=$i adj= $i', intArgs=(/i,j,cellsOnCellFull%array(j,i)/) ) + ! end do + end do + end do + call mpas_log_write('nTotalEdgesGraph is $i', intArgs=(/nTotalEdgesGraph/)) + allocate(edgetab(nTotalEdgesGraph)) + allocate(verttab(partial_global_graph_info % nVerticesTotal + 1)) + !allocate(parttab(partial_global_graph_info % nVerticesTotal)) + + !do i=1,partial_global_graph_info % nVerticesTotal + !call mpas_log_write('proc=$i i= $i part= $i', intArgs=(/dminfo % my_proc_id, i,parttab(i)/)) + !call mpas_log_write('i=$i j=$i adj= $i', intArgs=(/i,j,cellsOnCellFull%array(j,i)/) ) + !end do + + k = 1 + do i=1,partial_global_graph_info % nVerticesTotal + verttab(i) = k + !call mpas_log_write('i=$i verttab= $i', intArgs=(/i,verttab(i)/) ) + do j=1,nEdgesOnCellFull% array(i) + + if (cellsOnCellFull%array(j,i) == 0) cycle + + edgetab(k) = cellsOnCellFull%array(j,i) + !call mpas_log_write('k=$i edgetab= $i', intArgs=(/k,edgetab(k)/) ) + k = k + 1 + end do + end do + verttab(partial_global_graph_info % nVerticesTotal+1) = nTotalEdgesGraph + 1 + + !call mpas_log_write('nvertices =$i nTotalEdgesGraph= $i', intArgs=(/partial_global_graph_info % nVerticesTotal, nTotalEdgesGraph/)) + + call scotchfstratinit (stradat (1), ierr) + call scotchfgraphinit (scotchgraph (1), ierr) - read(unit=iunit, fmt=*, iostat=err) + if (ierr .ne. 0) then + call mpas_log_write('Cannot initialize Scotch Graph', MPAS_LOG_CRIT) + endif + + CALL scotchfgraphbuild (scotchgraph (1), 1, partial_global_graph_info % nVerticesTotal, & + verttab (1), verttab (2), & + verttab (1), verttab (1), & + nTotalEdgesGraph, edgetab (1), edgetab (1), ierr) + if (ierr .ne. 0) then + call mpas_log_write('Cannot build Scotch Graph', MPAS_LOG_CRIT) + endif + + ! Only needed during development/debugging. + !call scotchfgraphcheck (scotchgraph (1), ierr) + !if (ierr .ne. 0) then + ! call mpas_log_write('Cannot check Scotch Graph', MPAS_LOG_CRIT) + !endif + + call scotchfgraphpart( scotchgraph (1), dminfo % total_blocks, stradat (1) ,global_block_id_arr(1), IERR) + + call scotchfgraphexit (SCOTCHGRAPH (1)) + call scotchfstratexit (stradat (1)) + + local_nvertices(:) = 0 + do i=1,partial_global_graph_info % nVerticesTotal + !global_block_id_arr(i) = parttab(i) + call mpas_get_owning_proc(dminfo, global_block_id_arr(i), owning_proc) + owning_proc_arr(i) = owning_proc + !call mpas_log_write('vert: $i, global_block_id: $i, owning_proc: $i ', intArgs=(/i,global_block_id_arr(i),owning_proc/) ) + local_nvertices(owning_proc+1) = local_nvertices(owning_proc+1) + 1 + end do + + if (istatus /= 0) then + call mpas_log_write('Writing out Scotch Graph paritioning data to '//trim(filename)) + call mpas_new_unit(ounit) + open(unit=ounit, file=trim(filename), form='formatted', status='new', action="write", iostat=ostatus) + do i=1,partial_global_graph_info % nVerticesTotal + write(unit=ounit, fmt='(i0)', iostat=err) global_block_id_arr(i) + !write(unit=ounit, fmt='(*(i0,1x))', iostat=err) global_block_id_arr(i) + end do + close(unit=ounit) + call mpas_release_unit(ounit) + end if + #endif + else + + call mpas_log_write('Using block decomposition file: '//trim(filename)) + + call mpas_log_write('First read pass ') + local_nvertices(:) = 0 + do i=1,partial_global_graph_info % nVerticesTotal + read(unit=iunit, fmt=*, iostat=err) global_block_id + global_block_id_arr(i) = global_block_id + + if ( err .ne. 0 ) then + call mpas_log_write('Decomoposition file: ' // trim(filename) // ' contains less than $i cells', & + MPAS_LOG_CRIT, intArgs=(/partial_global_graph_info % nVerticesTotal/) ) + end if + call mpas_get_owning_proc(dminfo, global_block_id, owning_proc) + owning_proc_arr(i) = owning_proc + !call mpas_log_write('vert: $i, global_block_id: $i, owning_proc: $i ', intArgs=(/i,global_block_id,owning_proc/) ) + local_nvertices(owning_proc+1) = local_nvertices(owning_proc+1) + 1 + !call mpas_log_write('owning_proc+1: $i local_nvertices= $i', intArgs=(/owning_proc+1, local_nvertices(owning_proc+1)/)) + end do + + read(unit=iunit, fmt=*, iostat=err) + + if ( err == 0 ) then + call mpas_log_write('Decomposition file: ' // trim(filename) // ' contains more than $i cells', & + MPAS_LOG_CRIT, intArgs=(/partial_global_graph_info % nVerticesTotal/) ) + end if - if ( err == 0 ) then - call mpas_log_write('Decomposition file: ' // trim(filename) // ' contains more than $i cells', & - MPAS_LOG_CRIT, intArgs=(/partial_global_graph_info % nVerticesTotal/) ) + close(unit=iunit) + call mpas_release_unit(iunit) end if global_start(1) = 1 do i=2,dminfo % nprocs global_start(i) = global_start(i-1) + local_nvertices(i-1) + !call mpas_log_write('i: $i global_start= $i', intArgs=(/i, global_start(i)/)) end do - rewind(unit=iunit) + !rewind(unit=iunit) + call mpas_log_write('Second read pass ') do i=1,partial_global_graph_info % nVerticesTotal - read(unit=iunit, fmt=*, iostat=err) global_block_id - call mpas_get_owning_proc(dminfo, global_block_id, owning_proc) + !read(unit=iunit, fmt=*, iostat=err) global_block_id + !call mpas_get_owning_proc(dminfo, global_block_id, owning_proc) + global_block_id = global_block_id_arr(i) + owning_proc = owning_proc_arr(i) global_list(global_start(owning_proc+1)) = i + !call mpas_log_write('owning_proc+1: $i global_start= $i global_list=$i', intArgs=(/owning_proc+1, global_start(owning_proc+1), global_list(global_start(owning_proc+1))/)) global_start(owning_proc+1) = global_start(owning_proc+1) + 1 + !call mpas_log_write('global_start(owning_proc+1): $i', intArgs=(/global_start(owning_proc+1)/)) end do global_start(1) = 0 do i=2,dminfo % nprocs global_start(i) = global_start(i-1) + local_nvertices(i-1) + !call mpas_log_write('i: $i global_start= $i', intArgs=(/i, global_start(i)/)) end do call mpas_dmpar_bcast_ints(dminfo, dminfo % nprocs, local_nvertices) @@ -173,28 +312,34 @@ subroutine mpas_block_decomp_cells_for_proc(dminfo, partial_global_graph_info, l global_start(1) = 1 do i=2,dminfo % nprocs global_start(i) = global_start(i-1) + local_nvertices(i-1) + !call mpas_log_write('i: $i global_start= $i', intArgs=(/i, global_start(i)/)) end do - rewind(unit=iunit) + !rewind(unit=iunit) + call mpas_log_write('Third read pass ') do i=1,partial_global_graph_info % nVerticesTotal - read(unit=iunit, fmt=*) global_block_id - call mpas_get_owning_proc(dminfo, global_block_id, owning_proc) + !read(unit=iunit, fmt=*) global_block_id + !call mpas_get_owning_proc(dminfo, global_block_id, owning_proc) + global_block_id = global_block_id_arr(i) + owning_proc = owning_proc_arr(i) global_list(global_start(owning_proc+1)) = global_block_id + !call mpas_log_write('owning_proc+1: $i global_start= $i global_list=$i', intArgs=(/owning_proc+1, global_start(owning_proc+1), global_list(global_start(owning_proc+1))/)) global_start(owning_proc+1) = global_start(owning_proc+1) + 1 + !call mpas_log_write('global_start(owning_proc+1): $i', intArgs=(/global_start(owning_proc+1)/)) end do ! Recompute global start after second read of global_block_list global_start(1) = 0 do i=2,dminfo % nprocs global_start(i) = global_start(i-1) + local_nvertices(i-1) + !call mpas_log_write('i: $i global_start= $i', intArgs=(/i, global_start(i)/)) end do call mpas_dmpar_scatter_ints(dminfo, dminfo % nprocs, local_nvertices(dminfo % my_proc_id + 1), & global_start, local_nvertices, global_list, local_block_list) - close(unit=iunit) - call mpas_release_unit(iunit) + else diff --git a/src/framework/mpas_bootstrapping.F b/src/framework/mpas_bootstrapping.F index 4241255e2a..f444032e3b 100644 --- a/src/framework/mpas_bootstrapping.F +++ b/src/framework/mpas_bootstrapping.F @@ -106,8 +106,8 @@ subroutine mpas_bootstrap_framework_phase1(domain, mesh_filename, mesh_iotype, p type (field1dInteger), pointer :: indexToCellIDField type (field1dInteger), pointer :: indexToEdgeIDField type (field1dInteger), pointer :: indexToVertexIDField - type (field1dInteger), pointer :: nEdgesOnCellField - type (field2dInteger), pointer :: cellsOnCellField + type (field1dInteger), pointer :: nEdgesOnCellField, nEdgesOnCellFull + type (field2dInteger), pointer :: cellsOnCellField, cellsOnCellFull type (field2dInteger), pointer :: edgesOnCellField type (field2dInteger), pointer :: verticesOnCellField type (field2dInteger), pointer :: cellsOnEdgeField @@ -144,7 +144,7 @@ subroutine mpas_bootstrap_framework_phase1(domain, mesh_filename, mesh_iotype, p integer, pointer :: config_num_halos, config_number_of_blocks logical, pointer :: config_explicit_proc_decomp character (len=StrKIND), pointer :: config_block_decomp_file_prefix, config_proc_decomp_file_prefix - integer :: nHalos + integer :: nHalos, j, vind call mpas_pool_get_config(domain % configs, 'config_num_halos', config_num_halos) @@ -197,6 +197,16 @@ subroutine mpas_bootstrap_framework_phase1(domain, mesh_filename, mesh_iotype, p ! which cells/edges/vertices are owned by each block, and which are ghost ! + allocate(cellsOnCellFull) + allocate(cellsOnCellFull % array(maxEdges,nCells)) + call MPAS_io_inq_var(inputHandle, 'cellsOnCell', ierr=ierr) + call mpas_io_get_var(inputHandle, 'cellsOnCell', cellsOnCellFull % array, ierr) + + allocate(nEdgesOnCellFull) + allocate(nEdgesOnCellFull % array(nCells)) + call MPAS_io_inq_var(inputHandle, 'nEdgesOnCell', ierr=ierr) + call mpas_io_get_var(inputHandle, 'nEdgesOnCell', nEdgesOnCellFull % array, ierr) + call mpas_io_setup_cell_block_fields(inputHandle, nreadCells, readCellStart, readingBlock, maxEdges, & indexTocellIDField, xCellField, yCellField, zCellField, nEdgesOnCellField, & cellsOnCellField, edgesOnCellField, verticesOnCellField, nHalos) @@ -235,7 +245,7 @@ subroutine mpas_bootstrap_framework_phase1(domain, mesh_filename, mesh_iotype, p ! file, but in principle it could call some online, distributed mesh partitioning library. ! call mpas_block_decomp_cells_for_proc(domain % dminfo, partial_global_graph_info, local_cell_list, block_id, block_start, & - block_count, config_number_of_blocks, config_explicit_proc_decomp, & + block_count, cellsOnCellFull, nEdgesOnCellFull, config_number_of_blocks, config_explicit_proc_decomp, & config_block_decomp_file_prefix, config_proc_decomp_file_prefix) deallocate(partial_global_graph_info % vertexID)