Skip to content

ENH: add runtime library support for MODMEM_USAGE config setting #965

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 1 commit into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions darshan-runtime/doc/darshan-runtime.txt
Original file line number Diff line number Diff line change
Expand Up @@ -503,6 +503,8 @@ The Darshan library honors the following settings to modify behavior at runtime:
| DARSHAN_INTERNAL_TIMING=1 | INTERNAL_TIMING
| Enables internal instrumentation that will print the time required
to startup and shutdown Darshan to stderr at runtime.
| DARSHAN_MODMEM_USAGE=1 | MODMEM_USAGE
| Prints details on memory usage of Darshan's instrumentation modules.
| DARSHAN_MODMEM=<val> | MODMEM <val>
| Specifies the amount of memory (in MiB) Darshan instrumentation
modules can collectively consume (if not specified, a default 4 MiB
Expand Down
4 changes: 4 additions & 0 deletions darshan-runtime/lib/darshan-config.c
Original file line number Diff line number Diff line change
Expand Up @@ -441,6 +441,8 @@ void darshan_parse_config_env(struct darshan_config *cfg)
cfg->dump_config_flag = 1;
if(getenv("DARSHAN_INTERNAL_TIMING"))
cfg->internal_timing_flag = 1;
if(getenv("DARSHAN_MODMEM_USAGE"))
cfg->mod_mem_usage_flag = 1;
if(getenv("DARSHAN_DISABLE_SHARED_REDUCTION"))
cfg->disable_shared_redux_flag = 1;

Expand Down Expand Up @@ -814,6 +816,8 @@ void darshan_parse_config_file(struct darshan_config *cfg)
cfg->dump_config_flag = 1;
else if(strcmp(key, "INTERNAL_TIMING") == 0)
cfg->internal_timing_flag = 1;
else if(strcmp(key, "MODMEM_USAGE") == 0)
cfg->mod_mem_usage_flag = 1;
else if(strcmp(key, "DISABLE_SHARED_REDUCTION") == 0)
cfg->disable_shared_redux_flag = 1;
else
Expand Down
1 change: 1 addition & 0 deletions darshan-runtime/lib/darshan-config.h
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@ struct darshan_config
struct dxt_trigger *small_io_trigger;
struct dxt_trigger *unaligned_io_trigger;
int internal_timing_flag;
int mod_mem_usage_flag;
int disable_shared_redux_flag;
int dump_config_flag;
};
Expand Down
51 changes: 44 additions & 7 deletions darshan-runtime/lib/darshan-core.c
Original file line number Diff line number Diff line change
Expand Up @@ -599,6 +599,7 @@ void darshan_core_shutdown(int write_log)
struct darshan_core_module* this_mod = final_core->mod_array[i];
void* mod_buf = NULL;
int mod_buf_sz = 0;
size_t mod_bytes_registered = 0, mod_bytes_allocated = 0;

if(!active_mods[i])
{
Expand All @@ -615,6 +616,8 @@ void darshan_core_shutdown(int write_log)
{
mod_buf = final_core->mod_array[i]->rec_buf_start;
mod_buf_sz = final_core->mod_array[i]->rec_buf_p - mod_buf;
mod_bytes_registered = final_core->mod_array[i]->bytes_registered;
mod_bytes_allocated = final_core->mod_array[i]->bytes_allocated;

#ifdef HAVE_MPI
if(using_mpi)
Expand Down Expand Up @@ -670,6 +673,33 @@ void darshan_core_shutdown(int write_log)
/* error out if unable to write module data */
DARSHAN_CHECK_ERR(ret, "unable to write %s module data to log file %s",
darshan_module_names[i], logfile_name);

#ifdef HAVE_MPI
if(using_mpi)
{
/* reduce to get max per-rank */
if(my_rank == 0)
{
PMPI_Reduce(MPI_IN_PLACE, &mod_bytes_registered, 1, MPI_UNSIGNED,
MPI_MAX, 0, final_core->mpi_comm);
PMPI_Reduce(MPI_IN_PLACE, &mod_bytes_allocated, 1, MPI_UNSIGNED,
MPI_MAX, 0, final_core->mpi_comm);
}
else
{
PMPI_Reduce(&mod_bytes_registered, &mod_bytes_registered, 1, MPI_UNSIGNED,
MPI_MAX, 0, final_core->mpi_comm);
PMPI_Reduce(&mod_bytes_allocated, &mod_bytes_allocated, 1, MPI_UNSIGNED,
MPI_MAX, 0, final_core->mpi_comm);
}
}
#endif

/* print details on module memory usage if requested */
if(my_rank == 0 && final_core->config.mod_mem_usage_flag)
darshan_core_fprintf(stderr,
"# Darshan %s module: bytes_registered=%lu bytes_allocated=%lu\n",
darshan_module_names[i], mod_bytes_registered, mod_bytes_allocated);
}

if(internal_timing_flag)
Expand Down Expand Up @@ -2226,6 +2256,9 @@ static int darshan_core_name_is_excluded(const char *name, darshan_module_id mod
int tmp_index = 0;
struct darshan_core_regex *regex;

if(!name)
return(0);

/* set flag if this module's record names are based on file paths */
name_is_path = 1;
if((mod_id == DARSHAN_APMPI_MOD) || (mod_id == DARSHAN_APXC_MOD) ||
Expand Down Expand Up @@ -2563,6 +2596,7 @@ int darshan_core_register_module(
mod->rec_mem_avail = mod_mem_req;
*inout_rec_count = mod_recs_req;
}
mod->bytes_allocated = mod->rec_mem_avail;

/* register module with darshan */
__darshan_core->mod_array[mod_id] = mod;
Expand Down Expand Up @@ -2636,6 +2670,16 @@ void *darshan_core_register_record(
return(NULL);
}

if(darshan_core_name_is_excluded(name, mod_id))
{
/* do not register record if name matches any exclusion rules */
__DARSHAN_CORE_UNLOCK();
return(NULL);
}

/* hold on to total number of bytes registered for each module(for DXT we track bytes instead) */
__darshan_core->mod_array[mod_id]->bytes_registered += rec_size;

/* check to see if this module has enough space to store a new record */
if(__darshan_core->mod_array[mod_id]->rec_mem_avail < rec_size)
{
Expand All @@ -2647,13 +2691,6 @@ void *darshan_core_register_record(
/* register a name record if a name is given for this record */
if(name)
{
if(darshan_core_name_is_excluded(name, mod_id))
{
/* do not register record if name matches any exclusion rules */
__DARSHAN_CORE_UNLOCK();
return(NULL);
}

/* check to see if we've already stored the id->name mapping for
* this record, and add a new name record if not
*/
Expand Down
2 changes: 2 additions & 0 deletions darshan-runtime/lib/darshan.h
Original file line number Diff line number Diff line change
Expand Up @@ -277,6 +277,8 @@ struct darshan_core_module
{
void *rec_buf_start;
void *rec_buf_p;
size_t bytes_allocated;
size_t bytes_registered;
size_t rec_mem_avail;
darshan_module_funcs mod_funcs;
};
Expand Down