From 3c0b66fbe0a141783dde7a80c7fa713623f8935e Mon Sep 17 00:00:00 2001 From: hammad45 Date: Tue, 25 Jul 2023 15:17:14 -0700 Subject: [PATCH 01/10] Added support for stack memory addresses in DXT --- darshan-runtime/lib/darshan-config.c | 12 +++ darshan-runtime/lib/darshan-config.h | 1 + darshan-runtime/lib/darshan-core.c | 5 ++ darshan-runtime/lib/darshan-dxt.c | 24 ++++++ darshan-runtime/lib/darshan-dxt.h | 6 +- darshan-util/darshan-dxt-logutils.c | 73 +++++++++++++++++-- .../pydarshan/darshan/backend/api_def_c.py | 4 + .../pydarshan/darshan/backend/cffi_backend.py | 21 +++++- include/darshan-dxt-log-format.h | 6 +- 9 files changed, 140 insertions(+), 12 deletions(-) diff --git a/darshan-runtime/lib/darshan-config.c b/darshan-runtime/lib/darshan-config.c index 09d6ad51a..3a7ff4b16 100644 --- a/darshan-runtime/lib/darshan-config.c +++ b/darshan-runtime/lib/darshan-config.c @@ -437,6 +437,18 @@ void darshan_parse_config_env(struct darshan_config *cfg) } } } + envstr = getenv("DXT_ENABLE_STACK_TRACE"); + if(envstr) + { + struct dxt_trigger *trigger = malloc(sizeof(*trigger)); + if(trigger) + { + trigger->type = DXT_COLLECT_STACK_TRACE; + trigger->u.unaligned_io.thresh_pct = 0; + cfg->stack_trace_trigger = trigger; + } + } + if(getenv("DARSHAN_DUMP_CONFIG")) cfg->dump_config_flag = 1; if(getenv("DARSHAN_INTERNAL_TIMING")) diff --git a/darshan-runtime/lib/darshan-config.h b/darshan-runtime/lib/darshan-config.h index f3f7c212a..6c6b499a2 100644 --- a/darshan-runtime/lib/darshan-config.h +++ b/darshan-runtime/lib/darshan-config.h @@ -37,6 +37,7 @@ struct darshan_config char *rank_inclusions; struct dxt_trigger *small_io_trigger; struct dxt_trigger *unaligned_io_trigger; + struct dxt_trigger *stack_trace_trigger; int internal_timing_flag; int disable_shared_redux_flag; int dump_config_flag; diff --git a/darshan-runtime/lib/darshan-core.c b/darshan-runtime/lib/darshan-core.c index 1a1bda192..3cfbde8cb 100644 --- a/darshan-runtime/lib/darshan-core.c +++ b/darshan-runtime/lib/darshan-core.c @@ -394,6 +394,10 @@ void darshan_core_initialize(int argc, char **argv) darshan_core_fprintf(stderr, "darshan:init\t%d\t%f\n", nprocs, init_time); } + if(init_core->config.stack_trace_trigger){ + dxt_enable_stack_trace(); + } + return; } @@ -587,6 +591,7 @@ void darshan_core_shutdown(int write_log) if(final_core->config.unaligned_io_trigger) dxt_posix_apply_trace_filter(final_core->config.unaligned_io_trigger); + /* loop over globally used darshan modules and: * - get final output buffer * - compress (zlib) provided output buffer diff --git a/darshan-runtime/lib/darshan-dxt.c b/darshan-runtime/lib/darshan-dxt.c index 38e9c5f24..0011c59b3 100644 --- a/darshan-runtime/lib/darshan-dxt.c +++ b/darshan-runtime/lib/darshan-dxt.c @@ -28,6 +28,7 @@ #include #include #include +#include #include "utlist.h" #include "uthash.h" @@ -58,6 +59,8 @@ typedef int64_t off64_t; /* NOTE: when this size is exceeded, the buffer size is doubled */ #define IO_TRACE_BUF_SIZE 64 +bool isStackTrace = false; + /* The dxt_file_record_ref structure maintains necessary runtime metadata * for the DXT file record (dxt_file_record structure, defined in * darshan-dxt-log-format.h) pointed to by 'file_rec'. This metadata @@ -262,6 +265,10 @@ void dxt_posix_write(darshan_record_id rec_id, int64_t offset, rec_ref->write_traces[file_rec->write_count].length = length; rec_ref->write_traces[file_rec->write_count].start_time = start_time; rec_ref->write_traces[file_rec->write_count].end_time = end_time; + if (isStackTrace) + backtrace (rec_ref->write_traces[file_rec->write_count].stack_trace.address_array, 10); + else + rec_ref->write_traces[file_rec->write_count].stack_trace.noStackTrace = 0; file_rec->write_count += 1; DXT_UNLOCK(); @@ -307,6 +314,10 @@ void dxt_posix_read(darshan_record_id rec_id, int64_t offset, rec_ref->read_traces[file_rec->read_count].length = length; rec_ref->read_traces[file_rec->read_count].start_time = start_time; rec_ref->read_traces[file_rec->read_count].end_time = end_time; + if (isStackTrace) + backtrace (rec_ref->read_traces[file_rec->read_count].stack_trace.address_array , 10); + else + rec_ref->read_traces[file_rec->read_count].stack_trace.noStackTrace = 0; file_rec->read_count += 1; DXT_UNLOCK(); @@ -352,6 +363,10 @@ void dxt_mpiio_write(darshan_record_id rec_id, int64_t offset, rec_ref->write_traces[file_rec->write_count].offset = offset; rec_ref->write_traces[file_rec->write_count].start_time = start_time; rec_ref->write_traces[file_rec->write_count].end_time = end_time; + if (isStackTrace) + backtrace (rec_ref->write_traces[file_rec->write_count].stack_trace.address_array, 10); + else + rec_ref->write_traces[file_rec->write_count].stack_trace.noStackTrace = 0; file_rec->write_count += 1; DXT_UNLOCK(); @@ -397,11 +412,20 @@ void dxt_mpiio_read(darshan_record_id rec_id, int64_t offset, rec_ref->read_traces[file_rec->read_count].offset = offset; rec_ref->read_traces[file_rec->read_count].start_time = start_time; rec_ref->read_traces[file_rec->read_count].end_time = end_time; + if (isStackTrace) + backtrace (rec_ref->read_traces[file_rec->read_count].stack_trace.address_array , 10); + else + rec_ref->read_traces[file_rec->read_count].stack_trace.noStackTrace = 0; file_rec->read_count += 1; DXT_UNLOCK(); } +void dxt_enable_stack_trace () +{ + isStackTrace = true; +} + static void dxt_posix_filter_traces_iterator(void *rec_ref_p, void *user_ptr) { struct dxt_file_record_ref *psx_rec_ref, *mpiio_rec_ref; diff --git a/darshan-runtime/lib/darshan-dxt.h b/darshan-runtime/lib/darshan-dxt.h index a03ce9608..812796b31 100644 --- a/darshan-runtime/lib/darshan-dxt.h +++ b/darshan-runtime/lib/darshan-dxt.h @@ -19,7 +19,8 @@ enum dxt_trigger_type { DXT_SMALL_IO_TRIGGER, - DXT_UNALIGNED_IO_TRIGGER + DXT_UNALIGNED_IO_TRIGGER, + DXT_COLLECT_STACK_TRACE }; struct dxt_trigger { @@ -36,6 +37,7 @@ struct dxt_trigger } u; }; + /* dxt_posix_runtime_initialize() * * DXT function exposed to POSIX module for initializing DXT-POSIX runtime. @@ -72,4 +74,6 @@ void dxt_mpiio_read(darshan_record_id rec_id, int64_t offset, void dxt_posix_apply_trace_filter(struct dxt_trigger *trigger); +void dxt_enable_stack_trace(); + #endif /* __DARSHAN_DXT_H */ diff --git a/darshan-util/darshan-dxt-logutils.c b/darshan-util/darshan-dxt-logutils.c index 28eb50a52..0e526b859 100644 --- a/darshan-util/darshan-dxt-logutils.c +++ b/darshan-util/darshan-dxt-logutils.c @@ -19,6 +19,7 @@ #include #include #include +#include #include "darshan-logutils.h" @@ -300,11 +301,12 @@ void dxt_log_print_posix_file(void *posix_file_rec, char *file_name, int64_t rank = file_rec->base_rec.rank; char *hostname = file_rec->hostname; + int64_t write_count = file_rec->write_count; int64_t read_count = file_rec->read_count; segment_info *io_trace = (segment_info *) ((void *)file_rec + sizeof(struct dxt_file_record)); - + /* Lustre File System */ struct darshan_lustre_record *rec; int lustreFS = !strcmp(fs_type, "lustre"); @@ -313,11 +315,15 @@ void dxt_log_print_posix_file(void *posix_file_rec, char *file_name, int64_t cur_offset; int print_count; int ost_idx; + bool isStackTrace = true; if (!lustre_rec_ref) { lustreFS = 0; } + if (io_trace->stack_trace.noStackTrace==0) + isStackTrace = false; + printf("\n# DXT, file_id: %" PRIu64 ", file_name: %s\n", f_id, file_name); printf("# DXT, rank: %" PRId64 ", hostname: %s\n", rank, hostname); printf("# DXT, write_count: %" PRId64 ", read_count: %" PRId64 "\n", @@ -339,11 +345,14 @@ void dxt_log_print_posix_file(void *posix_file_rec, char *file_name, } /* Print header */ - printf("# Module Rank Wt/Rd Segment Offset Length Start(s) End(s)"); + printf("# Module Rank Wt/Rd Segment Offset Length Start(s) End(s)"); if (lustreFS) { - printf(" [OST]"); + printf(" [OST]"); } + + if (isStackTrace) + printf(" Memory Offsets"); printf("\n"); /* Print IO Traces information */ @@ -361,7 +370,7 @@ void dxt_log_print_posix_file(void *posix_file_rec, char *file_name, print_count = 0; while (cur_offset < offset + length) { - printf(" [%3" PRId64 "]", (rec->ost_ids)[ost_idx]); + printf(" [%3" PRId64 "]", (rec->ost_ids)[ost_idx]); cur_offset = (cur_offset / stripe_size + 1) * stripe_size; ost_idx = (ost_idx == stripe_count - 1) ? 0 : ost_idx + 1; @@ -372,6 +381,15 @@ void dxt_log_print_posix_file(void *posix_file_rec, char *file_name, } } + if (isStackTrace){ + printf(" ["); + for (int j = 0; j < 10; j++) { + printf("%p", io_trace[i].stack_trace.address_array[j]); + if (j != 9) + printf(", "); + } + printf("]"); + } printf("\n"); } @@ -389,7 +407,7 @@ void dxt_log_print_posix_file(void *posix_file_rec, char *file_name, print_count = 0; while (cur_offset < offset + length) { - printf(" [%3" PRId64 "]", (rec->ost_ids)[ost_idx]); + printf(" [%3" PRId64 "]", (rec->ost_ids)[ost_idx]); cur_offset = (cur_offset / stripe_size + 1) * stripe_size; ost_idx = (ost_idx == stripe_count - 1) ? 0 : ost_idx + 1; @@ -400,6 +418,16 @@ void dxt_log_print_posix_file(void *posix_file_rec, char *file_name, } } + if (isStackTrace){ + printf(" ["); + for (int j = 0; j < 10; j++) { + printf("%p", io_trace[i].stack_trace.address_array[j]); + if (j != 9) + printf(", "); + } + printf("]"); + } + printf("\n"); } return; @@ -426,6 +454,10 @@ void dxt_log_print_mpiio_file(void *mpiio_file_rec, char *file_name, segment_info *io_trace = (segment_info *) ((void *)file_rec + sizeof(struct dxt_file_record)); + + bool isStackTrace = true; + if (io_trace[0].stack_trace.noStackTrace == 0) + isStackTrace = false; printf("\n# DXT, file_id: %" PRIu64 ", file_name: %s\n", f_id, file_name); printf("# DXT, rank: %" PRId64 ", hostname: %s\n", rank, hostname); @@ -435,7 +467,10 @@ void dxt_log_print_mpiio_file(void *mpiio_file_rec, char *file_name, printf("# DXT, mnt_pt: %s, fs_type: %s\n", mnt_pt, fs_type); /* Print header */ - printf("# Module Rank Wt/Rd Segment Offset Length Start(s) End(s)\n"); + if (isStackTrace) + printf("# Module Rank Wt/Rd Segment Offset Length Start(s) End(s) Memory Offsets\n"); + else + printf("# Module Rank Wt/Rd Segment Offset Length Start(s) End(s)\n"); /* Print IO Traces information */ for (i = 0; i < write_count; i++) { @@ -444,7 +479,18 @@ void dxt_log_print_mpiio_file(void *mpiio_file_rec, char *file_name, start_time = io_trace[i].start_time; end_time = io_trace[i].end_time; - printf("%8s%8" PRId64 "%7s%9d%16" PRId64 "%16" PRId64 "%12.4f%12.4f\n", "X_MPIIO", rank, "write", i, offset, length, start_time, end_time); + printf("%8s%8" PRId64 "%7s%9d%16" PRId64 "%16" PRId64 "%12.4f%12.4f", "X_MPIIO", rank, "write", i, offset, length, start_time, end_time); + + if (isStackTrace){ + printf(" ["); + for (int j = 0; j < 10; j++) { + printf("%p", io_trace[i].stack_trace.address_array[j]); + if (j != 9) + printf(", "); + } + printf("]"); + } + printf("\n"); } for (i = write_count; i < write_count + read_count; i++) { @@ -453,7 +499,18 @@ void dxt_log_print_mpiio_file(void *mpiio_file_rec, char *file_name, start_time = io_trace[i].start_time; end_time = io_trace[i].end_time; - printf("%8s%8" PRId64 "%7s%9d%16" PRId64 "%16" PRId64 "%12.4f%12.4f\n", "X_MPIIO", rank, "read", (int)(i - write_count), offset, length, start_time, end_time); + printf("%8s%8" PRId64 "%7s%9d%16" PRId64 "%16" PRId64 "%12.4f%12.4f", "X_MPIIO", rank, "read", (int)(i - write_count), offset, length, start_time, end_time); + + if (isStackTrace){ + printf(" ["); + for (int j = 0; j < 10; j++) { + printf("%p", io_trace[i].stack_trace.address_array[j]); + if (j != 9) + printf(", "); + } + printf("]"); + } + printf("\n"); } return; diff --git a/darshan-util/pydarshan/darshan/backend/api_def_c.py b/darshan-util/pydarshan/darshan/backend/api_def_c.py index 50ae9cae3..2c651fa6f 100644 --- a/darshan-util/pydarshan/darshan/backend/api_def_c.py +++ b/darshan-util/pydarshan/darshan/backend/api_def_c.py @@ -177,6 +177,10 @@ int64_t length; double start_time; double end_time; + union { + void *address_array[10]; + int noStackTrace; + } stack_trace; } segment_info; /* counter names */ diff --git a/darshan-util/pydarshan/darshan/backend/cffi_backend.py b/darshan-util/pydarshan/darshan/backend/cffi_backend.py index bd7c338d8..4e3c711b7 100644 --- a/darshan-util/pydarshan/darshan/backend/cffi_backend.py +++ b/darshan-util/pydarshan/darshan/backend/cffi_backend.py @@ -52,7 +52,6 @@ check_version(ffi, libdutil) - _mod_names = [ "NULL", "POSIX", @@ -121,6 +120,7 @@ def log_open(filename): Return: log handle """ + b_fname = filename.encode() handle = libdutil.darshan_log_open(b_fname) log = {"handle": handle, 'modules': None, 'name_records': None} @@ -609,7 +609,6 @@ def log_get_dxt_record(log, mod_name, reads=True, writes=True, dtype='dict'): size_of = ffi.sizeof("struct dxt_file_record") segments = ffi.cast("struct segment_info *", buf[0] + size_of ) - for i in range(wcnt): seg = { "offset": segments[i].offset, @@ -617,6 +616,15 @@ def log_get_dxt_record(log, mod_name, reads=True, writes=True, dtype='dict'): "start_time": segments[i].start_time, "end_time": segments[i].end_time } + seg_array = [] + if not segments[i].stack_trace.noStackTrace == 0: + for j in range(10): + addr = str(segments[i].stack_trace.address_array[j]) + addr = addr.split("'void *' ") + addr = addr[1].split(">") + seg_array.append(addr[0]) + seg["stack_memory_addresses"] = seg_array + rec['write_segments'].append(seg) @@ -628,6 +636,15 @@ def log_get_dxt_record(log, mod_name, reads=True, writes=True, dtype='dict'): "start_time": segments[i].start_time, "end_time": segments[i].end_time } + seg_array = [] + if not segments[i].stack_trace.noStackTrace == 0: + for j in range(10): + addr = str(segments[i].stack_trace.address_array[j]) + addr = addr.split("'void *' ") + addr = addr[1].split(">") + seg_array.append(addr[0]) + seg["stack_memory_addresses"] = seg_array + rec['read_segments'].append(seg) diff --git a/include/darshan-dxt-log-format.h b/include/darshan-dxt-log-format.h index b31fc9928..0afb62dd7 100644 --- a/include/darshan-dxt-log-format.h +++ b/include/darshan-dxt-log-format.h @@ -11,7 +11,7 @@ #define DXT_MPIIO_VER 2 #define HOSTNAME_SIZE 64 - +#include /* * DXT, the segment_info structure maintains detailed Segment IO tracing * information @@ -21,6 +21,10 @@ typedef struct segment_info { int64_t length; double start_time; double end_time; + union { + void *address_array[10]; + int noStackTrace; + } stack_trace; } segment_info; #define X(a) a, From fe6f9a14cec95e7a69ff0a474e216626ca45f6b6 Mon Sep 17 00:00:00 2001 From: hammad45 Date: Thu, 3 Aug 2023 18:25:12 -0700 Subject: [PATCH 02/10] Added address to line mapping for DXT modules --- darshan-runtime/lib/darshan-dxt.c | 236 ++++++++++++++++-- darshan-runtime/lib/darshan.h | 9 + darshan-util/darshan-dxt-logutils.c | 30 +-- darshan-util/darshan-dxt-parser.c | 12 + darshan-util/darshan-logutils.c | 17 +- darshan-util/darshan-logutils.h | 3 +- darshan-util/darshan-parser.c | 1 - .../pydarshan/darshan/backend/api_def_c.py | 7 +- .../pydarshan/darshan/backend/cffi_backend.py | 14 +- include/darshan-dxt-log-format.h | 9 +- include/darshan-log-format.h | 2 + 11 files changed, 288 insertions(+), 52 deletions(-) diff --git a/darshan-runtime/lib/darshan-dxt.c b/darshan-runtime/lib/darshan-dxt.c index 0011c59b3..6fd5a4bbf 100644 --- a/darshan-runtime/lib/darshan-dxt.c +++ b/darshan-runtime/lib/darshan-dxt.c @@ -59,6 +59,8 @@ typedef int64_t off64_t; /* NOTE: when this size is exceeded, the buffer size is doubled */ #define IO_TRACE_BUF_SIZE 64 +#define STACK_TRACE_BUF_SIZE 26 + bool isStackTrace = false; /* The dxt_file_record_ref structure maintains necessary runtime metadata @@ -156,6 +158,9 @@ void dxt_posix_runtime_initialize() }; int ret; + + // set_posix_line_mapping(posix_line_mapping, isStackTrace); + /* register the DXT module with darshan core */ ret = darshan_core_register_module( DXT_POSIX_MOD, @@ -260,15 +265,18 @@ void dxt_posix_write(darshan_record_id rec_id, int64_t offset, DXT_UNLOCK(); return; } - + rec_ref->write_traces[file_rec->write_count].offset = offset; rec_ref->write_traces[file_rec->write_count].length = length; rec_ref->write_traces[file_rec->write_count].start_time = start_time; rec_ref->write_traces[file_rec->write_count].end_time = end_time; - if (isStackTrace) - backtrace (rec_ref->write_traces[file_rec->write_count].stack_trace.address_array, 10); + if (isStackTrace){ + backtrace (rec_ref->write_traces[file_rec->write_count].address_array, STACK_TRACE_BUF_SIZE); + rec_ref->write_traces[file_rec->write_count].noStackTrace = 1; + } else - rec_ref->write_traces[file_rec->write_count].stack_trace.noStackTrace = 0; + rec_ref->write_traces[file_rec->write_count].noStackTrace = 0; + file_rec->write_count += 1; DXT_UNLOCK(); @@ -314,10 +322,12 @@ void dxt_posix_read(darshan_record_id rec_id, int64_t offset, rec_ref->read_traces[file_rec->read_count].length = length; rec_ref->read_traces[file_rec->read_count].start_time = start_time; rec_ref->read_traces[file_rec->read_count].end_time = end_time; - if (isStackTrace) - backtrace (rec_ref->read_traces[file_rec->read_count].stack_trace.address_array , 10); + if (isStackTrace){ + backtrace (rec_ref->read_traces[file_rec->read_count].address_array , STACK_TRACE_BUF_SIZE); + rec_ref->write_traces[file_rec->write_count].noStackTrace = 1; + } else - rec_ref->read_traces[file_rec->read_count].stack_trace.noStackTrace = 0; + rec_ref->read_traces[file_rec->read_count].noStackTrace = 0; file_rec->read_count += 1; DXT_UNLOCK(); @@ -349,7 +359,7 @@ void dxt_mpiio_write(darshan_record_id rec_id, int64_t offset, return; } } - + file_rec = rec_ref->file_rec; check_wr_trace_buf(rec_ref, DXT_MPIIO_MOD, dxt_mpiio_runtime); if(file_rec->write_count == rec_ref->write_available_buf) @@ -358,15 +368,18 @@ void dxt_mpiio_write(darshan_record_id rec_id, int64_t offset, DXT_UNLOCK(); return; } - + rec_ref->write_traces[file_rec->write_count].length = length; rec_ref->write_traces[file_rec->write_count].offset = offset; rec_ref->write_traces[file_rec->write_count].start_time = start_time; rec_ref->write_traces[file_rec->write_count].end_time = end_time; - if (isStackTrace) - backtrace (rec_ref->write_traces[file_rec->write_count].stack_trace.address_array, 10); + if (isStackTrace){ + backtrace (rec_ref->write_traces[file_rec->write_count].address_array, STACK_TRACE_BUF_SIZE); + rec_ref->write_traces[file_rec->write_count].noStackTrace = 1; + } else - rec_ref->write_traces[file_rec->write_count].stack_trace.noStackTrace = 0; + rec_ref->write_traces[file_rec->write_count].noStackTrace = 0; + file_rec->write_count += 1; DXT_UNLOCK(); @@ -407,15 +420,17 @@ void dxt_mpiio_read(darshan_record_id rec_id, int64_t offset, DXT_UNLOCK(); return; } - + rec_ref->read_traces[file_rec->read_count].length = length; rec_ref->read_traces[file_rec->read_count].offset = offset; rec_ref->read_traces[file_rec->read_count].start_time = start_time; rec_ref->read_traces[file_rec->read_count].end_time = end_time; - if (isStackTrace) - backtrace (rec_ref->read_traces[file_rec->read_count].stack_trace.address_array , 10); + if (isStackTrace){ + backtrace (rec_ref->read_traces[file_rec->read_count].address_array , STACK_TRACE_BUF_SIZE); + rec_ref->write_traces[file_rec->write_count].noStackTrace = 1; + } else - rec_ref->read_traces[file_rec->read_count].stack_trace.noStackTrace = 0; + rec_ref->read_traces[file_rec->read_count].noStackTrace = 0; file_rec->read_count += 1; DXT_UNLOCK(); @@ -812,6 +827,7 @@ static void dxt_serialize_posix_records(void *rec_ref_p, void *user_ptr) record_write_count = file_rec->write_count; record_read_count = file_rec->read_count; + if (record_write_count == 0 && record_read_count == 0) return; @@ -829,9 +845,10 @@ static void dxt_serialize_posix_records(void *rec_ref_p, void *user_ptr) memcpy(tmp_buf_ptr, (void *)file_rec, sizeof(struct dxt_file_record)); tmp_buf_ptr = (void *)(tmp_buf_ptr + sizeof(struct dxt_file_record)); - /*Copy write record */ + /*Copy write record */ memcpy(tmp_buf_ptr, (void *)(rec_ref->write_traces), record_write_count * sizeof(segment_info)); + tmp_buf_ptr = (void *)(tmp_buf_ptr + record_write_count * sizeof(segment_info)); @@ -841,6 +858,98 @@ static void dxt_serialize_posix_records(void *rec_ref_p, void *user_ptr) tmp_buf_ptr = (void *)(tmp_buf_ptr + record_read_count * sizeof(segment_info)); + if (isStackTrace){ + + int * unique_memory_addresses; + int size = STACK_TRACE_BUF_SIZE; + unique_memory_addresses = (int*)calloc(size, sizeof(int)); + + int j = 0; + int curr_size = 0; + + for(int i = 0; i < record_write_count * STACK_TRACE_BUF_SIZE; i++){ + int flag = 0; + if (j != STACK_TRACE_BUF_SIZE){ + for(int k = 0; k < curr_size; k++){ + if (unique_memory_addresses[k] == (int )(rec_ref->write_traces->address_array[i])){ + flag = 1; + break; + } + } + if (flag == 0){ + if (curr_size == size){ + size = size * 2; + unique_memory_addresses = realloc(unique_memory_addresses, size * sizeof(int)); + } + unique_memory_addresses[curr_size] = (int )(rec_ref->write_traces->address_array[i]); + curr_size = curr_size + 1; + } + j = j + 1; + } + else{ + j = 0; + i = i + 4; + } + } + + j = 0; + for(int i = 0; i < record_read_count * STACK_TRACE_BUF_SIZE; i++){ + int flag = 0; + if (j != STACK_TRACE_BUF_SIZE){ + for(int k = 0; k < curr_size; k++){ + if (unique_memory_addresses[k] == (int )(rec_ref->read_traces->address_array[i])){ + flag = 1; + break; + } + } + if (flag == 0){ + if (curr_size == size){ + size = size * 2; + unique_memory_addresses = realloc(unique_memory_addresses, size * sizeof(int)); + } + unique_memory_addresses[curr_size] = (int )(rec_ref->read_traces->address_array[i]); + curr_size = curr_size + 1; + } + j = j + 1; + } + else{ + j = 0; + i = i + 4; + } + } + + char * address_line_mapping; + char * address_line_mapping_cur = ""; + + char * exe_name = darshan_exe(); + + for(int i = 0; i < curr_size; i++){ + FILE *FileOpen; + char syscom[256]; + char line[100]; + + if (unique_memory_addresses[i]){ + sprintf(syscom, "addr2line -a %p -e %s", unique_memory_addresses[i], exe_name); + FileOpen = popen(syscom, "r"); + + while (fgets(line, sizeof line, FileOpen)) + { + if (strstr(line, "0x") == NULL && strstr(line, "(nil)") == NULL){ + if (strstr(line, "??") == NULL){ + sprintf(syscom,"%p, %s", unique_memory_addresses[i], line); + address_line_mapping = (char *)calloc(strlen(address_line_mapping_cur) + strlen(syscom) + 1, sizeof(char)); + strcat(address_line_mapping, address_line_mapping_cur); + strcat(address_line_mapping, syscom); + address_line_mapping_cur = address_line_mapping; + } + } + } + } + } + + set_posix_line_mapping(address_line_mapping, isStackTrace); + free(address_line_mapping); + } dxt_posix_runtime->record_buf_size += record_size; } @@ -932,6 +1041,99 @@ static void dxt_serialize_mpiio_records(void *rec_ref_p, void *user_ptr) tmp_buf_ptr = (void *)(tmp_buf_ptr + record_read_count * sizeof(segment_info)); + if (isStackTrace){ + + int * unique_memory_addresses; + int size = STACK_TRACE_BUF_SIZE; + unique_memory_addresses = (int*)calloc(size, sizeof(int)); + + int j = 0; + int curr_size = 0; + + for(int i = 0; i < record_write_count * STACK_TRACE_BUF_SIZE; i++){ + int flag = 0; + if (j != STACK_TRACE_BUF_SIZE){ + for(int k = 0; k < curr_size; k++){ + if (unique_memory_addresses[k] == (int )(rec_ref->write_traces->address_array[i])){ + flag = 1; + break; + } + } + if (flag == 0){ + if (curr_size == size){ + size = size * 2; + unique_memory_addresses = realloc(unique_memory_addresses, size * sizeof(int)); + } + unique_memory_addresses[curr_size] = (int )(rec_ref->write_traces->address_array[i]); + curr_size = curr_size + 1; + } + j = j + 1; + } + else{ + j = 0; + i = i + 4; + } + } + + j = 0; + for(int i = 0; i < record_read_count * STACK_TRACE_BUF_SIZE; i++){ + int flag = 0; + if (j != STACK_TRACE_BUF_SIZE){ + for(int k = 0; k < curr_size; k++){ + if (unique_memory_addresses[k] == (int )(rec_ref->read_traces->address_array[i])){ + flag = 1; + break; + } + } + if (flag == 0){ + if (curr_size == size){ + size = size * 2; + unique_memory_addresses = realloc(unique_memory_addresses, size * sizeof(int)); + } + unique_memory_addresses[curr_size] = (int )(rec_ref->read_traces->address_array[i]); + curr_size = curr_size + 1; + } + j = j + 1; + } + else{ + j = 0; + i = i + 4; + } + } + + char * mpiio_address_line_mapping; + char * mpiio_address_line_mapping_cur = ""; + + char * exe_name = darshan_exe(); + + for(int i = 0; i < curr_size; i++){ + FILE *FileOpen; + char syscom[256]; + char line[100]; + + if (unique_memory_addresses[i]){ + sprintf(syscom, "addr2line -a %p -e %s", unique_memory_addresses[i], exe_name); + FileOpen = popen(syscom, "r"); + + while (fgets(line, sizeof line, FileOpen)) + { + + if (strstr(line, "0x") == NULL && strstr(line, "(nil)") == NULL) { + if (strstr(line, "??") == NULL){ + sprintf(syscom,"%p, %s", unique_memory_addresses[i], line); + mpiio_address_line_mapping = (char *)calloc(strlen(mpiio_address_line_mapping_cur) + strlen(syscom) + 1, sizeof(char)); + strcat(mpiio_address_line_mapping, mpiio_address_line_mapping_cur); + strcat(mpiio_address_line_mapping, syscom); + mpiio_address_line_mapping_cur = mpiio_address_line_mapping; + } + } + } + } + } + + set_mpiio_line_mapping(mpiio_address_line_mapping, isStackTrace); + free(mpiio_address_line_mapping); + } dxt_mpiio_runtime->record_buf_size += record_size; } diff --git a/darshan-runtime/lib/darshan.h b/darshan-runtime/lib/darshan.h index f29c36bdc..2807a0691 100644 --- a/darshan-runtime/lib/darshan.h +++ b/darshan-runtime/lib/darshan.h @@ -314,6 +314,15 @@ int darshan_core_register_module( void darshan_core_unregister_module( darshan_module_id mod_id); + +char *darshan_exe(); + +void set_posix_line_mapping( + char *mapping_array, bool isStackTrace); + +void set_mpiio_line_mapping( + char *mapping_array, bool isStackTrace); + /* darshan_instrument_fs_data() * * Allow file system-specific modules to instrument data for the file diff --git a/darshan-util/darshan-dxt-logutils.c b/darshan-util/darshan-dxt-logutils.c index 0e526b859..a856d919e 100644 --- a/darshan-util/darshan-dxt-logutils.c +++ b/darshan-util/darshan-dxt-logutils.c @@ -23,6 +23,8 @@ #include "darshan-logutils.h" +#define STACK_TRACE_BUF_SIZE 26 + static int dxt_log_get_posix_file(darshan_fd fd, void** dxt_posix_buf_p); static int dxt_log_put_posix_file(darshan_fd fd, void* dxt_posix_buf); @@ -321,7 +323,7 @@ void dxt_log_print_posix_file(void *posix_file_rec, char *file_name, lustreFS = 0; } - if (io_trace->stack_trace.noStackTrace==0) + if (io_trace->noStackTrace==0) isStackTrace = false; printf("\n# DXT, file_id: %" PRIu64 ", file_name: %s\n", f_id, file_name); @@ -383,9 +385,9 @@ void dxt_log_print_posix_file(void *posix_file_rec, char *file_name, if (isStackTrace){ printf(" ["); - for (int j = 0; j < 10; j++) { - printf("%p", io_trace[i].stack_trace.address_array[j]); - if (j != 9) + for (int j = 0; j < STACK_TRACE_BUF_SIZE; j++) { + printf("%p", io_trace[i].address_array[j]); + if (j != STACK_TRACE_BUF_SIZE - 1) printf(", "); } printf("]"); @@ -420,9 +422,9 @@ void dxt_log_print_posix_file(void *posix_file_rec, char *file_name, if (isStackTrace){ printf(" ["); - for (int j = 0; j < 10; j++) { - printf("%p", io_trace[i].stack_trace.address_array[j]); - if (j != 9) + for (int j = 0; j < STACK_TRACE_BUF_SIZE; j++) { + printf("%p", io_trace[i].address_array[j]); + if (j != STACK_TRACE_BUF_SIZE - 1) printf(", "); } printf("]"); @@ -456,7 +458,7 @@ void dxt_log_print_mpiio_file(void *mpiio_file_rec, char *file_name, ((void *)file_rec + sizeof(struct dxt_file_record)); bool isStackTrace = true; - if (io_trace[0].stack_trace.noStackTrace == 0) + if (io_trace[0].noStackTrace == 0) isStackTrace = false; printf("\n# DXT, file_id: %" PRIu64 ", file_name: %s\n", f_id, file_name); @@ -483,9 +485,9 @@ void dxt_log_print_mpiio_file(void *mpiio_file_rec, char *file_name, if (isStackTrace){ printf(" ["); - for (int j = 0; j < 10; j++) { - printf("%p", io_trace[i].stack_trace.address_array[j]); - if (j != 9) + for (int j = 0; j < STACK_TRACE_BUF_SIZE; j++) { + printf("%p", io_trace[i].address_array[j]); + if (j != STACK_TRACE_BUF_SIZE - 1) printf(", "); } printf("]"); @@ -503,9 +505,9 @@ void dxt_log_print_mpiio_file(void *mpiio_file_rec, char *file_name, if (isStackTrace){ printf(" ["); - for (int j = 0; j < 10; j++) { - printf("%p", io_trace[i].stack_trace.address_array[j]); - if (j != 9) + for (int j = 0; j < STACK_TRACE_BUF_SIZE; j++) { + printf("%p", io_trace[i].address_array[j]); + if (j != STACK_TRACE_BUF_SIZE - 1) printf(", "); } printf("]"); diff --git a/darshan-util/darshan-dxt-parser.c b/darshan-util/darshan-dxt-parser.c index 23007b92c..7c203a73a 100644 --- a/darshan-util/darshan-dxt-parser.c +++ b/darshan-util/darshan-dxt-parser.c @@ -138,6 +138,18 @@ int main(int argc, char **argv) printf("# metadata: %s = %s\n", key, value); } + if (strlen(fd->posix_line_mapping) != 0){ + printf("\n# DXT-POSIX address to line mapping\n"); + printf("# -------------------------------------------------------\n"); + printf("%s", fd->posix_line_mapping); + } + + if (strlen(fd->mpiio_line_mapping) != 0){ + printf("\n# DXT-MPIIO address to line mapping\n"); + printf("# -------------------------------------------------------\n"); + printf("%s", fd->mpiio_line_mapping); + } + /* print breakdown of each log file region's contribution to file size */ printf("\n# log file regions\n"); printf("# -------------------------------------------------------\n"); diff --git a/darshan-util/darshan-logutils.c b/darshan-util/darshan-logutils.c index 613d49f5d..964d02441 100644 --- a/darshan-util/darshan-logutils.c +++ b/darshan-util/darshan-logutils.c @@ -1085,7 +1085,7 @@ static int darshan_log_get_header(darshan_fd fd) int log_ver_maj, log_ver_min; int i; int ret; - + ret = darshan_log_seek(fd, 0); if(ret < 0) { @@ -1141,7 +1141,7 @@ static int darshan_log_get_header(darshan_fd fd) /* read uncompressed header from log file */ /* NOTE: header bumped from 16 to 64 modules at log ver 3.41 */ if(((log_ver_maj == 3) && (log_ver_min >= 41)) || (log_ver_maj > 3)) - { + { ret = darshan_log_read(fd, &header, sizeof(header)); if(ret != (int)sizeof(header)) { @@ -1164,6 +1164,8 @@ static int darshan_log_get_header(darshan_fd fd) struct darshan_log_map name_map; struct darshan_log_map mod_map[DARSHAN_MAX_MODS_3_00]; uint32_t mod_ver[DARSHAN_MAX_MODS_3_00]; + char posix_line_mapping[1024]; + char mpiio_line_mapping[1024]; } header_3_00; /* read old header structure */ @@ -1173,7 +1175,6 @@ static int darshan_log_get_header(darshan_fd fd) fprintf(stderr, "Error: failed to read darshan log file header.\n"); return(-1); } - /* set new header structure */ memset(&header, 0, sizeof(header)); strncpy(header.version_string, header_3_00.version_string, 8); @@ -1184,7 +1185,10 @@ static int darshan_log_get_header(darshan_fd fd) (1 + DARSHAN_MAX_MODS_3_00) * sizeof(header_3_00.name_map)); memcpy(&header.mod_ver, &header_3_00.mod_ver, (DARSHAN_MAX_MODS_3_00) * sizeof(header_3_00.mod_ver[0])); - + memcpy(&header.posix_line_mapping, &header_3_00.posix_line_mapping, + strlen(header.posix_line_mapping)); + memcpy(&header.mpiio_line_mapping, &header_3_00.mpiio_line_mapping, + strlen(header.mpiio_line_mapping)); fd->job_map.off = sizeof(header_3_00); } @@ -1265,6 +1269,7 @@ static int darshan_log_get_header(darshan_fd fd) // zero out bits up to (and including) PNETCDF_VAR in shifted flags partial_flag_shift = (partial_flag_shift >> (DARSHAN_PNETCDF_VAR_MOD+1)) << (DARSHAN_PNETCDF_VAR_MOD+1); + // zero out PNETCDF_VAR and all bits higher than it in original flags fd->partial_flag = fd->partial_flag & ((1 << DARSHAN_PNETCDF_VAR_MOD) - 1); // combine original flags and shifted flags @@ -1299,6 +1304,10 @@ static int darshan_log_get_header(darshan_fd fd) fd->job_map.len = fd->name_map.off - fd->job_map.off; } + if (strlen(header.posix_line_mapping) != 0) + memcpy(&fd->posix_line_mapping, &header.posix_line_mapping, strlen(header.posix_line_mapping)); + if (strlen(header.mpiio_line_mapping) != 0) + memcpy(&fd->mpiio_line_mapping, &header.mpiio_line_mapping, strlen(header.mpiio_line_mapping)); return(0); } diff --git a/darshan-util/darshan-logutils.h b/darshan-util/darshan-logutils.h index ee4033e11..d550612f2 100644 --- a/darshan-util/darshan-logutils.h +++ b/darshan-util/darshan-logutils.h @@ -40,7 +40,8 @@ struct darshan_fd_s struct darshan_log_map mod_map[DARSHAN_MAX_MODS]; /* module-specific log-format versions contained in log */ uint32_t mod_ver[DARSHAN_MAX_MODS]; - + char posix_line_mapping[1024]; + char mpiio_line_mapping[1024]; /* KEEP OUT -- remaining state hidden in logutils source */ struct darshan_fd_int_state *state; diff --git a/darshan-util/darshan-parser.c b/darshan-util/darshan-parser.c index 83c97eefc..a2a02fe4a 100644 --- a/darshan-util/darshan-parser.c +++ b/darshan-util/darshan-parser.c @@ -236,7 +236,6 @@ int main(int argc, char **argv) value++; printf("# metadata: %s = %s\n", key, value); } - /* print breakdown of each log file region's contribution to file size */ printf("\n# log file regions\n"); printf("# -------------------------------------------------------\n"); diff --git a/darshan-util/pydarshan/darshan/backend/api_def_c.py b/darshan-util/pydarshan/darshan/backend/api_def_c.py index 2c651fa6f..a6daabba3 100644 --- a/darshan-util/pydarshan/darshan/backend/api_def_c.py +++ b/darshan-util/pydarshan/darshan/backend/api_def_c.py @@ -63,6 +63,7 @@ /* from darshan-log-format.h */ typedef uint64_t darshan_record_id; +#define STACK_TRACE_BUF_SIZE 26 struct darshan_job { @@ -177,10 +178,8 @@ int64_t length; double start_time; double end_time; - union { - void *address_array[10]; - int noStackTrace; - } stack_trace; + void *address_array[STACK_TRACE_BUF_SIZE]; + int noStackTrace; } segment_info; /* counter names */ diff --git a/darshan-util/pydarshan/darshan/backend/cffi_backend.py b/darshan-util/pydarshan/darshan/backend/cffi_backend.py index 4e3c711b7..70924572b 100644 --- a/darshan-util/pydarshan/darshan/backend/cffi_backend.py +++ b/darshan-util/pydarshan/darshan/backend/cffi_backend.py @@ -92,7 +92,7 @@ def mod_name_to_idx(mod_name): "APMPI-PERF": "struct darshan_apmpi_perf_record **", } - +STACK_TRACE_BUF_SIZE = 26 def get_lib_version(): """ @@ -617,9 +617,9 @@ def log_get_dxt_record(log, mod_name, reads=True, writes=True, dtype='dict'): "end_time": segments[i].end_time } seg_array = [] - if not segments[i].stack_trace.noStackTrace == 0: - for j in range(10): - addr = str(segments[i].stack_trace.address_array[j]) + if not segments[i].noStackTrace == 0: + for j in range(STACK_TRACE_BUF_SIZE): + addr = str(segments[i].address_array[j]) addr = addr.split("'void *' ") addr = addr[1].split(">") seg_array.append(addr[0]) @@ -637,9 +637,9 @@ def log_get_dxt_record(log, mod_name, reads=True, writes=True, dtype='dict'): "end_time": segments[i].end_time } seg_array = [] - if not segments[i].stack_trace.noStackTrace == 0: - for j in range(10): - addr = str(segments[i].stack_trace.address_array[j]) + if not segments[i].noStackTrace == 0: + for j in range(STACK_TRACE_BUF_SIZE): + addr = str(segments[i].address_array[j]) addr = addr.split("'void *' ") addr = addr[1].split(">") seg_array.append(addr[0]) diff --git a/include/darshan-dxt-log-format.h b/include/darshan-dxt-log-format.h index 0afb62dd7..5191d7150 100644 --- a/include/darshan-dxt-log-format.h +++ b/include/darshan-dxt-log-format.h @@ -11,6 +11,9 @@ #define DXT_MPIIO_VER 2 #define HOSTNAME_SIZE 64 + +#define STACK_TRACE_BUF_SIZE 26 + #include /* * DXT, the segment_info structure maintains detailed Segment IO tracing @@ -21,10 +24,8 @@ typedef struct segment_info { int64_t length; double start_time; double end_time; - union { - void *address_array[10]; - int noStackTrace; - } stack_trace; + void *address_array[STACK_TRACE_BUF_SIZE]; + int noStackTrace; } segment_info; #define X(a) a, diff --git a/include/darshan-log-format.h b/include/darshan-log-format.h index 4fbf37b53..6ac8a3a0d 100644 --- a/include/darshan-log-format.h +++ b/include/darshan-log-format.h @@ -76,6 +76,8 @@ struct darshan_header struct darshan_log_map name_map; struct darshan_log_map mod_map[DARSHAN_MAX_MODS]; uint32_t mod_ver[DARSHAN_MAX_MODS]; + char posix_line_mapping[1024]; + char mpiio_line_mapping[1024]; }; /* job-level metadata stored for this application */ From 6188db7ce10936d0468978a6360e31409be764b6 Mon Sep 17 00:00:00 2001 From: hammad45 Date: Fri, 25 Aug 2023 14:05:21 -0700 Subject: [PATCH 03/10] Updated darshan-util and pydarshan --- darshan-runtime/lib/darshan-core.c | 64 +++++++++++++++-- darshan-runtime/lib/darshan-dxt.c | 10 ++- darshan-util/darshan-dxt-logutils.c | 46 ++++++++---- .../pydarshan/darshan/backend/api_def_c.py | 43 ++++++++++- .../pydarshan/darshan/backend/cffi_backend.py | 72 +++++++++++++++---- darshan-util/pydarshan/darshan/report.py | 4 ++ include/darshan-dxt-log-format.h | 2 +- 7 files changed, 202 insertions(+), 39 deletions(-) diff --git a/darshan-runtime/lib/darshan-core.c b/darshan-runtime/lib/darshan-core.c index 3cfbde8cb..ed1acc9f2 100644 --- a/darshan-runtime/lib/darshan-core.c +++ b/darshan-runtime/lib/darshan-core.c @@ -71,6 +71,10 @@ static int parent_pid; static struct darshan_core_mnt_data mnt_data_array[DARSHAN_MAX_MNTS]; static int mnt_data_count = 0; +static char *exe_name = ""; +static char *posix_line_mapping = ""; +static char *mpiio_line_mapping = ""; + #ifdef DARSHAN_BGQ extern void bgq_runtime_initialize(); #endif @@ -325,7 +329,6 @@ void darshan_core_initialize(int argc, char **argv) /* collect information about command line and mounted file systems */ darshan_get_exe_and_mounts(init_core, argc, argv); - if(!darshan_should_instrument_app(init_core)) { /* do not instrument excluded applications */ @@ -398,6 +401,14 @@ void darshan_core_initialize(int argc, char **argv) dxt_enable_stack_trace(); } + char *p; + p = strtok(init_core->log_exemnt_p, "\n"); + char *exe; + exe = strtok(p, " "); + + if(exe) + exe_name = exe; + return; } @@ -640,7 +651,7 @@ void darshan_core_shutdown(int write_log) mod_shared_recs[mod_shared_rec_cnt++] = shared_recs[j]; } } - + /* allow the module an opportunity to reduce shared files */ if(this_mod->mod_funcs.mod_redux_func && (mod_shared_rec_cnt > 0)) { @@ -663,6 +674,15 @@ void darshan_core_shutdown(int write_log) this_mod->mod_funcs.mod_output_func(&mod_buf, &mod_buf_sz); } + + for (int i=0; i < strlen(posix_line_mapping); i++){ + final_core->log_hdr_p->posix_line_mapping[i] = posix_line_mapping[i]; + } + + for (int i=0; i < strlen(mpiio_line_mapping); i++){ + final_core->log_hdr_p->mpiio_line_mapping[i] = mpiio_line_mapping[i]; + } + /* append this module's data to the darshan log */ final_core->log_hdr_p->mod_map[i].off = gz_fp; ret = darshan_log_append(log_fh, final_core, mod_buf, mod_buf_sz, &gz_fp); @@ -676,7 +696,7 @@ void darshan_core_shutdown(int write_log) DARSHAN_CHECK_ERR(ret, "unable to write %s module data to log file %s", darshan_module_names[i], logfile_name); } - + if(internal_timing_flag) header1 = darshan_core_wtime_absolute(); ret = darshan_log_write_header(log_fh, final_core); @@ -1004,6 +1024,8 @@ static void add_entry(char* buf, int* space_left, struct mntent* entry) * collects command line and list of mounted file systems into a string that * will be stored with the job-level metadata */ + + static void darshan_get_exe_and_mounts(struct darshan_core_runtime *core, int argc, char **argv) { @@ -2640,7 +2662,6 @@ void *darshan_core_register_record( __DARSHAN_CORE_UNLOCK(); return(NULL); } - /* check to see if this module has enough space to store a new record */ if(__darshan_core->mod_array[mod_id]->rec_mem_avail < rec_size) { @@ -2710,6 +2731,36 @@ void *darshan_core_register_record( return(rec_buf);; } +void set_posix_line_mapping(char *mapping_array, bool isStackTrace){ + + if (isStackTrace){ + posix_line_mapping = (char *)calloc(strlen(mapping_array), sizeof(char)); + + for (int i=0; i < strlen(mapping_array); i++){ + posix_line_mapping[i] = mapping_array[i]; + } + } + else{ + posix_line_mapping = ""; + } + return; +} + +void set_mpiio_line_mapping(char *mapping_array, bool isStackTrace){ + + if (isStackTrace){ + mpiio_line_mapping = (char *)calloc(strlen(mapping_array), sizeof(char)); + + for (int i=0; i < strlen(mapping_array); i++){ + mpiio_line_mapping[i] = mapping_array[i]; + } + } + else{ + mpiio_line_mapping = ""; + } + return; +} + char *darshan_core_lookup_record_name(darshan_record_id rec_id) { struct darshan_core_name_record_ref *ref; @@ -2766,6 +2817,11 @@ void darshan_core_fprintf( return; } +char *darshan_exe() +{ + return exe_name; +} + /* * Local variables: * c-indent-level: 4 diff --git a/darshan-runtime/lib/darshan-dxt.c b/darshan-runtime/lib/darshan-dxt.c index 6fd5a4bbf..b461fe36d 100644 --- a/darshan-runtime/lib/darshan-dxt.c +++ b/darshan-runtime/lib/darshan-dxt.c @@ -59,7 +59,7 @@ typedef int64_t off64_t; /* NOTE: when this size is exceeded, the buffer size is doubled */ #define IO_TRACE_BUF_SIZE 64 -#define STACK_TRACE_BUF_SIZE 26 +#define STACK_TRACE_BUF_SIZE 60 bool isStackTrace = false; @@ -324,7 +324,7 @@ void dxt_posix_read(darshan_record_id rec_id, int64_t offset, rec_ref->read_traces[file_rec->read_count].end_time = end_time; if (isStackTrace){ backtrace (rec_ref->read_traces[file_rec->read_count].address_array , STACK_TRACE_BUF_SIZE); - rec_ref->write_traces[file_rec->write_count].noStackTrace = 1; + rec_ref->read_traces[file_rec->read_count].noStackTrace = 1; } else rec_ref->read_traces[file_rec->read_count].noStackTrace = 0; @@ -427,7 +427,7 @@ void dxt_mpiio_read(darshan_record_id rec_id, int64_t offset, rec_ref->read_traces[file_rec->read_count].end_time = end_time; if (isStackTrace){ backtrace (rec_ref->read_traces[file_rec->read_count].address_array , STACK_TRACE_BUF_SIZE); - rec_ref->write_traces[file_rec->write_count].noStackTrace = 1; + rec_ref->read_traces[file_rec->read_count].noStackTrace = 1; } else rec_ref->read_traces[file_rec->read_count].noStackTrace = 0; @@ -859,7 +859,6 @@ static void dxt_serialize_posix_records(void *rec_ref_p, void *user_ptr) record_read_count * sizeof(segment_info)); if (isStackTrace){ - int * unique_memory_addresses; int size = STACK_TRACE_BUF_SIZE; unique_memory_addresses = (int*)calloc(size, sizeof(int)); @@ -893,7 +892,7 @@ static void dxt_serialize_posix_records(void *rec_ref_p, void *user_ptr) } j = 0; - for(int i = 0; i < record_read_count * STACK_TRACE_BUF_SIZE; i++){ + for(int i = 0; i < record_read_count * STACK_TRACE_BUF_SIZE; i++){ int flag = 0; if (j != STACK_TRACE_BUF_SIZE){ for(int k = 0; k < curr_size; k++){ @@ -1042,7 +1041,6 @@ static void dxt_serialize_mpiio_records(void *rec_ref_p, void *user_ptr) record_read_count * sizeof(segment_info)); if (isStackTrace){ - int * unique_memory_addresses; int size = STACK_TRACE_BUF_SIZE; unique_memory_addresses = (int*)calloc(size, sizeof(int)); diff --git a/darshan-util/darshan-dxt-logutils.c b/darshan-util/darshan-dxt-logutils.c index a856d919e..cd64d0f6e 100644 --- a/darshan-util/darshan-dxt-logutils.c +++ b/darshan-util/darshan-dxt-logutils.c @@ -23,7 +23,7 @@ #include "darshan-logutils.h" -#define STACK_TRACE_BUF_SIZE 26 +#define STACK_TRACE_BUF_SIZE 60 static int dxt_log_get_posix_file(darshan_fd fd, void** dxt_posix_buf_p); static int dxt_log_put_posix_file(darshan_fd fd, void* dxt_posix_buf); @@ -354,7 +354,7 @@ void dxt_log_print_posix_file(void *posix_file_rec, char *file_name, } if (isStackTrace) - printf(" Memory Offsets"); + printf(" Stack Memory Addresses"); printf("\n"); /* Print IO Traces information */ @@ -384,11 +384,15 @@ void dxt_log_print_posix_file(void *posix_file_rec, char *file_name, } if (isStackTrace){ + bool first = true; printf(" ["); for (int j = 0; j < STACK_TRACE_BUF_SIZE; j++) { - printf("%p", io_trace[i].address_array[j]); - if (j != STACK_TRACE_BUF_SIZE - 1) - printf(", "); + if (io_trace[i].address_array[j]){ + if (j != STACK_TRACE_BUF_SIZE - 1 && first == false) + printf(", "); + printf("%p", io_trace[i].address_array[j]); + first = false; + } } printf("]"); } @@ -421,11 +425,15 @@ void dxt_log_print_posix_file(void *posix_file_rec, char *file_name, } if (isStackTrace){ + bool first = true; printf(" ["); for (int j = 0; j < STACK_TRACE_BUF_SIZE; j++) { - printf("%p", io_trace[i].address_array[j]); - if (j != STACK_TRACE_BUF_SIZE - 1) - printf(", "); + if (io_trace[i].address_array[j]){ + if (j != STACK_TRACE_BUF_SIZE - 1 && first == false) + printf(", "); + printf("%p", io_trace[i].address_array[j]); + first = false; + } } printf("]"); } @@ -470,7 +478,7 @@ void dxt_log_print_mpiio_file(void *mpiio_file_rec, char *file_name, /* Print header */ if (isStackTrace) - printf("# Module Rank Wt/Rd Segment Offset Length Start(s) End(s) Memory Offsets\n"); + printf("# Module Rank Wt/Rd Segment Offset Length Start(s) End(s) Stack Memory Addresses\n"); else printf("# Module Rank Wt/Rd Segment Offset Length Start(s) End(s)\n"); @@ -484,11 +492,15 @@ void dxt_log_print_mpiio_file(void *mpiio_file_rec, char *file_name, printf("%8s%8" PRId64 "%7s%9d%16" PRId64 "%16" PRId64 "%12.4f%12.4f", "X_MPIIO", rank, "write", i, offset, length, start_time, end_time); if (isStackTrace){ + bool first = true; printf(" ["); for (int j = 0; j < STACK_TRACE_BUF_SIZE; j++) { - printf("%p", io_trace[i].address_array[j]); - if (j != STACK_TRACE_BUF_SIZE - 1) - printf(", "); + if (io_trace[i].address_array[j]){ + if (j != STACK_TRACE_BUF_SIZE - 1 && first == false) + printf(", "); + printf("%p", io_trace[i].address_array[j]); + first = false; + } } printf("]"); } @@ -504,11 +516,15 @@ void dxt_log_print_mpiio_file(void *mpiio_file_rec, char *file_name, printf("%8s%8" PRId64 "%7s%9d%16" PRId64 "%16" PRId64 "%12.4f%12.4f", "X_MPIIO", rank, "read", (int)(i - write_count), offset, length, start_time, end_time); if (isStackTrace){ + bool first = true; printf(" ["); for (int j = 0; j < STACK_TRACE_BUF_SIZE; j++) { - printf("%p", io_trace[i].address_array[j]); - if (j != STACK_TRACE_BUF_SIZE - 1) - printf(", "); + if (io_trace[i].address_array[j]){ + if (j != STACK_TRACE_BUF_SIZE - 1 && first == false) + printf(", "); + printf("%p", io_trace[i].address_array[j]); + first = false; + } } printf("]"); } diff --git a/darshan-util/pydarshan/darshan/backend/api_def_c.py b/darshan-util/pydarshan/darshan/backend/api_def_c.py index a6daabba3..5082d6435 100644 --- a/darshan-util/pydarshan/darshan/backend/api_def_c.py +++ b/darshan-util/pydarshan/darshan/backend/api_def_c.py @@ -32,6 +32,47 @@ struct darshan_file_category_counters category_counters[7]; }; +#define DARSHAN_MAX_MODS 64 + +struct darshan_log_map +{ + uint64_t off; + uint64_t len; +}; + +struct darshan_fd_int_state; + +/* darshan file descriptor definition */ +struct darshan_fd_s +{ + /* log file version */ + char version[8]; + /* flag indicating whether byte swapping needs to be + * performed on log file data + */ + int swap_flag; + /* bit-field indicating whether modules contain incomplete data */ + uint64_t partial_flag; + /* compression type used on log file */ + enum darshan_comp_type comp_type; + /* log file offset/length maps for each log file region */ + struct darshan_log_map job_map; + struct darshan_log_map name_map; + struct darshan_log_map mod_map[DARSHAN_MAX_MODS]; + /* module-specific log-format versions contained in log */ + uint32_t mod_ver[DARSHAN_MAX_MODS]; + char posix_line_mapping[1024]; + char mpiio_line_mapping[1024]; + /* KEEP OUT -- remaining state hidden in logutils source */ + struct darshan_fd_int_state *state; + + /* workaround to parse logs with slightly inconsistent heatmap bin + * counts as described in https://github.com/darshan-hpc/darshan/issues/941 + */ + int64_t first_heatmap_record_nbins; + double first_heatmap_record_bin_width_seconds; +}; + struct darshan_mnt_info { char mnt_type[3015]; @@ -63,7 +104,7 @@ /* from darshan-log-format.h */ typedef uint64_t darshan_record_id; -#define STACK_TRACE_BUF_SIZE 26 +#define STACK_TRACE_BUF_SIZE 60 struct darshan_job { diff --git a/darshan-util/pydarshan/darshan/backend/cffi_backend.py b/darshan-util/pydarshan/darshan/backend/cffi_backend.py index 70924572b..fa7716a29 100644 --- a/darshan-util/pydarshan/darshan/backend/cffi_backend.py +++ b/darshan-util/pydarshan/darshan/backend/cffi_backend.py @@ -92,7 +92,7 @@ def mod_name_to_idx(mod_name): "APMPI-PERF": "struct darshan_apmpi_perf_record **", } -STACK_TRACE_BUF_SIZE = 26 +STACK_TRACE_BUF_SIZE = 30 def get_lib_version(): """ @@ -583,7 +583,6 @@ def log_get_dxt_record(log, mod_name, reads=True, writes=True, dtype='dict'): return None mod_type = _structdefs[mod_name] #name_records = log_get_name_records(log) - rec = {} buf = ffi.new("void **") r = libdutil.darshan_log_get_record(log['handle'], modules[mod_name]['idx'], buf) @@ -605,7 +604,6 @@ def log_get_dxt_record(log, mod_name, reads=True, writes=True, dtype='dict'): rec['write_segments'] = [] rec['read_segments'] = [] - size_of = ffi.sizeof("struct dxt_file_record") segments = ffi.cast("struct segment_info *", buf[0] + size_of ) @@ -619,12 +617,13 @@ def log_get_dxt_record(log, mod_name, reads=True, writes=True, dtype='dict'): seg_array = [] if not segments[i].noStackTrace == 0: for j in range(STACK_TRACE_BUF_SIZE): - addr = str(segments[i].address_array[j]) - addr = addr.split("'void *' ") - addr = addr[1].split(">") - seg_array.append(addr[0]) + if (segments[i].address_array[j]): + addr = str(segments[i].address_array[j]) + addr = addr.split("'void *' ") + addr = addr[1].split(">") + seg_array.append(addr[0]) seg["stack_memory_addresses"] = seg_array - + rec['write_segments'].append(seg) @@ -639,10 +638,11 @@ def log_get_dxt_record(log, mod_name, reads=True, writes=True, dtype='dict'): seg_array = [] if not segments[i].noStackTrace == 0: for j in range(STACK_TRACE_BUF_SIZE): - addr = str(segments[i].address_array[j]) - addr = addr.split("'void *' ") - addr = addr[1].split(">") - seg_array.append(addr[0]) + if (segments[i].address_array[j]): + addr = str(segments[i].address_array[j]) + addr = addr.split("'void *' ") + addr = addr[1].split(">") + seg_array.append(addr[0]) seg["stack_memory_addresses"] = seg_array rec['read_segments'].append(seg) @@ -652,6 +652,54 @@ def log_get_dxt_record(log, mod_name, reads=True, writes=True, dtype='dict'): rec['read_segments'] = pd.DataFrame(rec['read_segments']) rec['write_segments'] = pd.DataFrame(rec['write_segments']) + size_of = ffi.sizeof("struct darshan_fd_s") + address_line_mapping = ffi.cast("struct darshan_fd_s *", log['handle']) + + if mod_name == 'DXT_POSIX': + rec['posix_address_line_mapping'] = [] + data = ffi.string(address_line_mapping.posix_line_mapping) + data = data.decode('utf-8') + data = data.split('\n') + for item in data: + if item: + item = item.split(",") + address = item[0] + + func_line = item[1] + func_line = func_line.split(":") + function_name = func_line[0] + line_number = func_line[1] + + mapping = { + "address": address, + "function_name": function_name, + "line_number": line_number + } + + rec['posix_address_line_mapping'].append(mapping) + elif mod_name == 'DXT_MPIIO': + rec['mpiio_address_line_mapping'] = [] + data = ffi.string(address_line_mapping.mpiio_line_mapping) + data = data.decode('utf-8') + data = data.split('\n') + for item in data: + if item: + item = item.split(",") + address = item[0] + + func_line = item[1] + func_line = func_line.split(":") + function_name = func_line[0] + line_number = func_line[1] + + mapping = { + "address": address, + "function_name": function_name, + "line_number": line_number + } + + rec['mpiio_address_line_mapping'].append(mapping) + libdutil.darshan_free(buf[0]) return rec diff --git a/darshan-util/pydarshan/darshan/report.py b/darshan-util/pydarshan/darshan/report.py index 047e4d568..9cb2e4c08 100644 --- a/darshan-util/pydarshan/darshan/report.py +++ b/darshan-util/pydarshan/darshan/report.py @@ -268,6 +268,10 @@ def to_df(self, attach="default"): for rec in records: rec['read_segments'] = pd.DataFrame(rec['read_segments']) rec['write_segments'] = pd.DataFrame(rec['write_segments']) + if mod == 'DXT_POSIX': + rec['posix_address_line_mapping'] = pd.DataFrame(rec['posix_address_line_mapping']) + elif mod == 'DXT_MPIIO': + rec['mpiio_address_line_mapping'] = pd.DataFrame(rec['mpiio_address_line_mapping']) else: df_recs = pd.DataFrame.from_records(records) # generic records have counter and fcounter arrays to collect diff --git a/include/darshan-dxt-log-format.h b/include/darshan-dxt-log-format.h index 5191d7150..056283582 100644 --- a/include/darshan-dxt-log-format.h +++ b/include/darshan-dxt-log-format.h @@ -12,7 +12,7 @@ #define HOSTNAME_SIZE 64 -#define STACK_TRACE_BUF_SIZE 26 +#define STACK_TRACE_BUF_SIZE 60 #include /* From 462252eef2b33058f24f4abdb91d8ffe5139ea3e Mon Sep 17 00:00:00 2001 From: hammad45 Date: Tue, 28 Nov 2023 10:27:34 -0800 Subject: [PATCH 04/10] Updated backtrace code --- darshan-runtime/lib/darshan-core.c | 149 ++++++- darshan-runtime/lib/darshan-dxt.c | 378 +++++++++--------- darshan-runtime/lib/darshan-hdf5.c | 1 + darshan-util/darshan-logutils.c | 4 +- darshan-util/darshan-logutils.h | 4 +- .../pydarshan/darshan/backend/api_def_c.py | 4 +- .../pydarshan/darshan/backend/cffi_backend.py | 74 ++-- include/darshan-dxt-log-format.h | 2 + include/darshan-log-format.h | 4 +- 9 files changed, 377 insertions(+), 243 deletions(-) diff --git a/darshan-runtime/lib/darshan-core.c b/darshan-runtime/lib/darshan-core.c index ed1acc9f2..153399a0d 100644 --- a/darshan-runtime/lib/darshan-core.c +++ b/darshan-runtime/lib/darshan-core.c @@ -20,6 +20,7 @@ #endif #include #include +#include #include #include #include @@ -51,6 +52,8 @@ #include #endif +#define STACK_TRACE_BUF_SIZE 60 + extern char* __progname; extern char* __progname_full; struct darshan_core_runtime *__darshan_core = NULL; @@ -72,8 +75,10 @@ static struct darshan_core_mnt_data mnt_data_array[DARSHAN_MAX_MNTS]; static int mnt_data_count = 0; static char *exe_name = ""; +static char *log_path_mappings = NULL; static char *posix_line_mapping = ""; static char *mpiio_line_mapping = ""; +bool processed = false; #ifdef DARSHAN_BGQ extern void bgq_runtime_initialize(); @@ -210,7 +215,6 @@ void darshan_core_initialize(int argc, char **argv) int ret; int i; struct timespec start_ts; - /* setup darshan runtime if darshan is enabled and hasn't been initialized already */ if (__darshan_core != NULL || getenv("DARSHAN_DISABLE")) return; @@ -560,6 +564,12 @@ void darshan_core_shutdown(int write_log) /* get the log file name */ darshan_get_logfile_name(logfile_name, final_core); + log_path_mappings = malloc(__DARSHAN_PATH_MAX); + + for (int i = 0; i < strlen(logfile_name); i++){ + log_path_mappings[i] = logfile_name[i]; + } + if(strlen(logfile_name) == 0) { /* failed to generate log file name */ @@ -674,14 +684,126 @@ void darshan_core_shutdown(int write_log) this_mod->mod_funcs.mod_output_func(&mod_buf, &mod_buf_sz); } - - for (int i=0; i < strlen(posix_line_mapping); i++){ - final_core->log_hdr_p->posix_line_mapping[i] = posix_line_mapping[i]; - } - - for (int i=0; i < strlen(mpiio_line_mapping); i++){ - final_core->log_hdr_p->mpiio_line_mapping[i] = mpiio_line_mapping[i]; +#ifdef HAVE_MPI + if(using_mpi) + { + if(my_rank == 0 && processed == false && final_core->config.stack_trace_trigger) + { + processed = true; + // char posixMappingsPath[1024]; + // getcwd(posixMappingsPath, sizeof(posixMappingsPath)); + // char source[] = "/posix_mappings.txt"; + // strcat(posixMappingsPath, source); + + // char mpiioMappingsPath[1024]; + // getcwd(mpiioMappingsPath, sizeof(mpiioMappingsPath)); + // char source1[] = "/mpiio_mappings.txt"; + // strcat(mpiioMappingsPath, source1); + + // FILE *FileOpen; + // char syscom[256]; + // char line[100]; + + // char sPath[1024] = ""; + // char *pTmp; + + // if (( pTmp =getenv( "LD_PRELOAD" )) != NULL ) + // strncpy( sPath, pTmp, 1024 - 1 ); // Save a copy for our use. + + // char * token = strtok(sPath, "-"); + // char source2[] = "-runtime/lib/script.py"; + // strcat(sPath,source2); + + // sprintf(syscom, "python3 %s %s %s %s", sPath, posixMappingsPath, mpiioMappingsPath, exe_name); + // FileOpen = popen(syscom, "r"); + // while (fgets(line, sizeof line, FileOpen)) + // { + // printf("%s", line); + // } + + + + + // FILE * fp; + // char * line1 = NULL; + // size_t len = 0; + // ssize_t read; + + // fp = fopen(posixMappingsPath, "r"); + // if (fp == NULL) + // exit(EXIT_FAILURE); + + // int ind = 0; + // while ((read = getline(&line1, &len, fp)) != -1) { + // for (int i=0; i < strlen(line1); i++){ + // final_core->log_hdr_p->posix_line_mapping[ind] = line1[i]; + // ind = ind + 1; + // } + // } + + // fp = fopen(mpiioMappingsPath, "r"); + // if (fp == NULL) + // exit(EXIT_FAILURE); + + // ind = 0; + // while ((read = getline(&line1, &len, fp)) != -1) { + // for (int i=0; i < strlen(line1); i++){ + // final_core->log_hdr_p->mpiio_line_mapping[ind] = line1[i]; + // ind = ind + 1; + // } + // } + + char * unique_memory_addresses; + int size = STACK_TRACE_BUF_SIZE; + unique_memory_addresses = (int*)calloc(size, sizeof(char)); + int curr_size = 0; + + FILE *fp; + char * line1 = NULL; + size_t len = 0; + ssize_t read; + + fp = fopen("/tmp/posix_mappings.txt", "r"); + if (fp == NULL) + exit(EXIT_FAILURE); + + // while ((read = getline(&line1, &len, fp)) != -1) { + // int flag = 0; + // if (strstr(line1, exe_name) != NULL) { + // char * token = strtok(line1, "["); + // token = strtok(NULL, "["); + // token = strtok(token, "]"); + // // if (strlen(token) < 16){ + // // // int number = (int)strtol(token, NULL, 16); + + // // // printf("%i\n", number); + // // // for(int k = 0; k < curr_size; k++){ + // // // if (unique_memory_addresses[k] == atoi(token)){ + // // // flag = 1; + // // // break; + // // // } + // // // } + // // // if (flag == 0){ + // // // if (curr_size == size){ + // // // size = size * 2; + // // // unique_memory_addresses = realloc(unique_memory_addresses, size * sizeof(int)); + // // // } + // // // unique_memory_addresses[curr_size] = atoi(token); + // // // curr_size = curr_size + 1; + // // // } + // // } + // } + // } + + // for (int i = 0; i < curr_size; i++){ + // printf("%i\n", unique_memory_addresses[i]); + // } + + // remove(mpiioMappingsPath); + // remove(posixMappingsPath); + } } +#endif /* append this module's data to the darshan log */ final_core->log_hdr_p->mod_map[i].off = gz_fp; @@ -2091,7 +2213,7 @@ void darshan_log_finalize(char *logfile_name, double start_log_time) /* set permissions on log file */ chmod(new_logfile_name, chmod_mode); free(new_logfile_name); - } + } } return; @@ -2822,6 +2944,15 @@ char *darshan_exe() return exe_name; } +void get_log_file_path(char *path) +{ + + for (int i = 0; i < strlen(log_path_mappings); i++){ + path[i] = log_path_mappings[i]; + } + return; +} + /* * Local variables: * c-indent-level: 4 diff --git a/darshan-runtime/lib/darshan-dxt.c b/darshan-runtime/lib/darshan-dxt.c index b461fe36d..7fe777ddc 100644 --- a/darshan-runtime/lib/darshan-dxt.c +++ b/darshan-runtime/lib/darshan-dxt.c @@ -62,7 +62,8 @@ typedef int64_t off64_t; #define STACK_TRACE_BUF_SIZE 60 bool isStackTrace = false; - +char posixMappingsPath[1024]; +char mpiioMappingsPath[1024]; /* The dxt_file_record_ref structure maintains necessary runtime metadata * for the DXT file record (dxt_file_record structure, defined in * darshan-dxt-log-format.h) pointed to by 'file_rec'. This metadata @@ -158,8 +159,9 @@ void dxt_posix_runtime_initialize() }; int ret; - - // set_posix_line_mapping(posix_line_mapping, isStackTrace); + // getcwd(posixMappingsPath, sizeof(posixMappingsPath)); + // char source[] = "/posix_mappings.txt"; + // strcat(posixMappingsPath, source); /* register the DXT module with darshan core */ ret = darshan_core_register_module( @@ -203,6 +205,9 @@ void dxt_mpiio_runtime_initialize() }; int ret; + // getcwd(mpiioMappingsPath, sizeof(mpiioMappingsPath)); + // char source[] = "/mpiio_mappings.txt"; + // strcat(mpiioMappingsPath, source); /* register the DXT module with darshan core */ ret = darshan_core_register_module( DXT_MPIIO_MOD, @@ -265,20 +270,37 @@ void dxt_posix_write(darshan_record_id rec_id, int64_t offset, DXT_UNLOCK(); return; } - rec_ref->write_traces[file_rec->write_count].offset = offset; rec_ref->write_traces[file_rec->write_count].length = length; rec_ref->write_traces[file_rec->write_count].start_time = start_time; rec_ref->write_traces[file_rec->write_count].end_time = end_time; if (isStackTrace){ - backtrace (rec_ref->write_traces[file_rec->write_count].address_array, STACK_TRACE_BUF_SIZE); + int size = backtrace (rec_ref->write_traces[file_rec->write_count].address_array, STACK_TRACE_BUF_SIZE); + for (int i = size; i < STACK_TRACE_BUF_SIZE; i++){ + rec_ref->write_traces[file_rec->write_count].address_array[i] = NULL; + } + + // FILE *fptr; + // fptr = fopen(posixMappingsPath, "a+"); + + // char **strings;; + // strings = backtrace_symbols (rec_ref->write_traces[file_rec->write_count].address_array, size); + // if (strings != NULL) + // { + // for (int j = 0; j < size; j++){ + // if (strings[j] != NULL) + // fprintf(fptr, "%s\n", strings[j]); + // } + // } + + // fclose(fptr); rec_ref->write_traces[file_rec->write_count].noStackTrace = 1; + rec_ref->write_traces[file_rec->write_count].size = size; } else rec_ref->write_traces[file_rec->write_count].noStackTrace = 0; file_rec->write_count += 1; - DXT_UNLOCK(); } @@ -323,13 +345,31 @@ void dxt_posix_read(darshan_record_id rec_id, int64_t offset, rec_ref->read_traces[file_rec->read_count].start_time = start_time; rec_ref->read_traces[file_rec->read_count].end_time = end_time; if (isStackTrace){ - backtrace (rec_ref->read_traces[file_rec->read_count].address_array , STACK_TRACE_BUF_SIZE); + int size = backtrace (rec_ref->read_traces[file_rec->read_count].address_array , STACK_TRACE_BUF_SIZE); + for (int i = size; i < STACK_TRACE_BUF_SIZE; i++){ + rec_ref->read_traces[file_rec->read_count].address_array[i] = NULL; + } + + // FILE *fptr; + // fptr = fopen(posixMappingsPath, "a+"); + + // char **strings;; + // strings = backtrace_symbols (rec_ref->read_traces[file_rec->read_count].address_array, size); + // if (strings != NULL) + // { + // for (int j = 0; j < size; j++){ + // if (strings[j] != NULL) + // fprintf(fptr, "%s\n", strings[j]); + // } + // } + + // fclose(fptr); rec_ref->read_traces[file_rec->read_count].noStackTrace = 1; + rec_ref->read_traces[file_rec->read_count].size = size; } else rec_ref->read_traces[file_rec->read_count].noStackTrace = 0; file_rec->read_count += 1; - DXT_UNLOCK(); } @@ -374,14 +414,31 @@ void dxt_mpiio_write(darshan_record_id rec_id, int64_t offset, rec_ref->write_traces[file_rec->write_count].start_time = start_time; rec_ref->write_traces[file_rec->write_count].end_time = end_time; if (isStackTrace){ - backtrace (rec_ref->write_traces[file_rec->write_count].address_array, STACK_TRACE_BUF_SIZE); + int size = backtrace (rec_ref->write_traces[file_rec->write_count].address_array, STACK_TRACE_BUF_SIZE); + for (int i = size; i < STACK_TRACE_BUF_SIZE; i++){ + rec_ref->write_traces[file_rec->write_count].address_array[i] = NULL; + } + // FILE *fptr; + // fptr = fopen(mpiioMappingsPath, "a+"); + + // char **strings;; + // strings = backtrace_symbols (rec_ref->write_traces[file_rec->write_count].address_array, size); + // if (strings != NULL) + // { + // for (int j = 0; j < size; j++){ + // if (strings[j] != NULL) + // fprintf(fptr, "%s\n", strings[j]); + // } + // } + + // fclose(fptr); rec_ref->write_traces[file_rec->write_count].noStackTrace = 1; + rec_ref->write_traces[file_rec->write_count].size = size; } else rec_ref->write_traces[file_rec->write_count].noStackTrace = 0; file_rec->write_count += 1; - DXT_UNLOCK(); } @@ -426,13 +483,30 @@ void dxt_mpiio_read(darshan_record_id rec_id, int64_t offset, rec_ref->read_traces[file_rec->read_count].start_time = start_time; rec_ref->read_traces[file_rec->read_count].end_time = end_time; if (isStackTrace){ - backtrace (rec_ref->read_traces[file_rec->read_count].address_array , STACK_TRACE_BUF_SIZE); + int size = backtrace (rec_ref->read_traces[file_rec->read_count].address_array , STACK_TRACE_BUF_SIZE); + for (int i = size; i < STACK_TRACE_BUF_SIZE; i++){ + rec_ref->read_traces[file_rec->read_count].address_array[i] = NULL; + } + // FILE *fptr; + // fptr = fopen(mpiioMappingsPath, "a+"); + + // char **strings;; + // strings = backtrace_symbols (rec_ref->read_traces[file_rec->read_count].address_array, size); + // if (strings != NULL) + // { + // for (int j = 0; j < size; j++){ + // if (strings[j] != NULL) + // fprintf(fptr, "%s\n", strings[j]); + // } + // } + + // fclose(fptr); rec_ref->read_traces[file_rec->read_count].noStackTrace = 1; + rec_ref->read_traces[file_rec->read_count].size = size; } else rec_ref->read_traces[file_rec->read_count].noStackTrace = 0; file_rec->read_count += 1; - DXT_UNLOCK(); } @@ -830,7 +904,56 @@ static void dxt_serialize_posix_records(void *rec_ref_p, void *user_ptr) if (record_write_count == 0 && record_read_count == 0) return; + + if (isStackTrace){ + // clock_t start, end; + // double cpu_time_used; + // start = clock(); + + // char *path = malloc(4096); + // get_log_file_path(path); + // char substr[256] = "posix_mappings.txt"; + // char * pch; + // pch=strchr(path,'.'); + + // int ind = strlen(path) - strlen(pch) + 1; + // for (int i = 0; i < strlen(substr); i++){ + // path[ind] = substr[i]; + // ind = ind + 1; + // } + // path[ind] ='\0'; + + + FILE *fptr; + fptr = fopen("/tmp/posix_mappings.txt", "a"); + + for(int i = 0; i < record_write_count; i++){ + char **strings; + int size = rec_ref->write_traces[i].size; + strings = backtrace_symbols (rec_ref->write_traces[i].address_array, size); + if (strings != NULL) + { + for (int j = 0; j < size; j++){ + // printf("%s\n", strings[i]); + fprintf(fptr, "%s\n", strings[j]); + } + } + } + for(int i = 0; i < record_read_count; i++){ + char **strings; + int size = rec_ref->read_traces[i].size; + strings = backtrace_symbols (rec_ref->read_traces[i].address_array, size); + if (strings != NULL) + { + for (int j = 0; j < size; j++){ + // printf("%s\n", strings[i]); + fprintf(fptr, "%s\n", strings[j]); + } + } + } + fclose(fptr); + } /* * Buffer format: * dxt_file_record + write_traces + read_traces @@ -858,97 +981,7 @@ static void dxt_serialize_posix_records(void *rec_ref_p, void *user_ptr) tmp_buf_ptr = (void *)(tmp_buf_ptr + record_read_count * sizeof(segment_info)); - if (isStackTrace){ - int * unique_memory_addresses; - int size = STACK_TRACE_BUF_SIZE; - unique_memory_addresses = (int*)calloc(size, sizeof(int)); - - int j = 0; - int curr_size = 0; - - for(int i = 0; i < record_write_count * STACK_TRACE_BUF_SIZE; i++){ - int flag = 0; - if (j != STACK_TRACE_BUF_SIZE){ - for(int k = 0; k < curr_size; k++){ - if (unique_memory_addresses[k] == (int )(rec_ref->write_traces->address_array[i])){ - flag = 1; - break; - } - } - if (flag == 0){ - if (curr_size == size){ - size = size * 2; - unique_memory_addresses = realloc(unique_memory_addresses, size * sizeof(int)); - } - unique_memory_addresses[curr_size] = (int )(rec_ref->write_traces->address_array[i]); - curr_size = curr_size + 1; - } - j = j + 1; - } - else{ - j = 0; - i = i + 4; - } - } - - j = 0; - for(int i = 0; i < record_read_count * STACK_TRACE_BUF_SIZE; i++){ - int flag = 0; - if (j != STACK_TRACE_BUF_SIZE){ - for(int k = 0; k < curr_size; k++){ - if (unique_memory_addresses[k] == (int )(rec_ref->read_traces->address_array[i])){ - flag = 1; - break; - } - } - if (flag == 0){ - if (curr_size == size){ - size = size * 2; - unique_memory_addresses = realloc(unique_memory_addresses, size * sizeof(int)); - } - unique_memory_addresses[curr_size] = (int )(rec_ref->read_traces->address_array[i]); - curr_size = curr_size + 1; - } - j = j + 1; - } - else{ - j = 0; - i = i + 4; - } - } - - char * address_line_mapping; - char * address_line_mapping_cur = ""; - - char * exe_name = darshan_exe(); - - for(int i = 0; i < curr_size; i++){ - FILE *FileOpen; - char syscom[256]; - char line[100]; - - if (unique_memory_addresses[i]){ - sprintf(syscom, "addr2line -a %p -e %s", unique_memory_addresses[i], exe_name); - FileOpen = popen(syscom, "r"); - - while (fgets(line, sizeof line, FileOpen)) - { - if (strstr(line, "0x") == NULL && strstr(line, "(nil)") == NULL){ - if (strstr(line, "??") == NULL){ - sprintf(syscom,"%p, %s", unique_memory_addresses[i], line); - address_line_mapping = (char *)calloc(strlen(address_line_mapping_cur) + strlen(syscom) + 1, sizeof(char)); - strcat(address_line_mapping, address_line_mapping_cur); - strcat(address_line_mapping, syscom); - address_line_mapping_cur = address_line_mapping; - } - } - } - } - } - - set_posix_line_mapping(address_line_mapping, isStackTrace); - free(address_line_mapping); - } + printf("%i\n", file_rec->base_rec.rank); dxt_posix_runtime->record_buf_size += record_size; } @@ -1013,7 +1046,53 @@ static void dxt_serialize_mpiio_records(void *rec_ref_p, void *user_ptr) record_read_count = file_rec->read_count; if (record_write_count == 0 && record_read_count == 0) return; + + if (isStackTrace){ + // clock_t start, end; + // double cpu_time_used; + // start = clock(); + + // char *path = malloc(4096); + // get_log_file_path(path); + // char substr[256] = "/tmp/mpiio_mappings.txt"; + // char * pch; + // pch=strchr(path,'.'); + + // int ind = strlen(path) - strlen(pch) + 1; + // for (int i = 0; i < strlen(substr); i++){ + // path[ind] = substr[i]; + // ind = ind + 1; + // } + // path[ind] ='\0'; + + FILE *fptr; + fptr = fopen("/tmp/mpiio_mappings.txt", "a"); + + for(int i = 0; i < record_write_count; i++){ + char **strings; + int size = rec_ref->write_traces[i].size; + strings = backtrace_symbols (rec_ref->write_traces[i].address_array, size); + if (strings != NULL) + { + for (int j = 0; j < size; j++){ + fprintf(fptr, "%s\n", strings[j]); + } + } + } + for(int i = 0; i < record_read_count; i++){ + char **strings; + int size = rec_ref->read_traces[i].size; + strings = backtrace_symbols (rec_ref->read_traces[i].address_array, size); + if (strings != NULL) + { + for (int j = 0; j < size; j++){ + fprintf(fptr, "%s\n", strings[j]); + } + } + } + fclose(fptr); + } /* * Buffer format: * dxt_file_record + write_traces + read_traces @@ -1039,99 +1118,6 @@ static void dxt_serialize_mpiio_records(void *rec_ref_p, void *user_ptr) record_read_count * sizeof(segment_info)); tmp_buf_ptr = (void *)(tmp_buf_ptr + record_read_count * sizeof(segment_info)); - - if (isStackTrace){ - int * unique_memory_addresses; - int size = STACK_TRACE_BUF_SIZE; - unique_memory_addresses = (int*)calloc(size, sizeof(int)); - - int j = 0; - int curr_size = 0; - - for(int i = 0; i < record_write_count * STACK_TRACE_BUF_SIZE; i++){ - int flag = 0; - if (j != STACK_TRACE_BUF_SIZE){ - for(int k = 0; k < curr_size; k++){ - if (unique_memory_addresses[k] == (int )(rec_ref->write_traces->address_array[i])){ - flag = 1; - break; - } - } - if (flag == 0){ - if (curr_size == size){ - size = size * 2; - unique_memory_addresses = realloc(unique_memory_addresses, size * sizeof(int)); - } - unique_memory_addresses[curr_size] = (int )(rec_ref->write_traces->address_array[i]); - curr_size = curr_size + 1; - } - j = j + 1; - } - else{ - j = 0; - i = i + 4; - } - } - - j = 0; - for(int i = 0; i < record_read_count * STACK_TRACE_BUF_SIZE; i++){ - int flag = 0; - if (j != STACK_TRACE_BUF_SIZE){ - for(int k = 0; k < curr_size; k++){ - if (unique_memory_addresses[k] == (int )(rec_ref->read_traces->address_array[i])){ - flag = 1; - break; - } - } - if (flag == 0){ - if (curr_size == size){ - size = size * 2; - unique_memory_addresses = realloc(unique_memory_addresses, size * sizeof(int)); - } - unique_memory_addresses[curr_size] = (int )(rec_ref->read_traces->address_array[i]); - curr_size = curr_size + 1; - } - j = j + 1; - } - else{ - j = 0; - i = i + 4; - } - } - - char * mpiio_address_line_mapping; - char * mpiio_address_line_mapping_cur = ""; - - char * exe_name = darshan_exe(); - - for(int i = 0; i < curr_size; i++){ - FILE *FileOpen; - char syscom[256]; - char line[100]; - - if (unique_memory_addresses[i]){ - sprintf(syscom, "addr2line -a %p -e %s", unique_memory_addresses[i], exe_name); - FileOpen = popen(syscom, "r"); - - while (fgets(line, sizeof line, FileOpen)) - { - - if (strstr(line, "0x") == NULL && strstr(line, "(nil)") == NULL) { - if (strstr(line, "??") == NULL){ - sprintf(syscom,"%p, %s", unique_memory_addresses[i], line); - mpiio_address_line_mapping = (char *)calloc(strlen(mpiio_address_line_mapping_cur) + strlen(syscom) + 1, sizeof(char)); - strcat(mpiio_address_line_mapping, mpiio_address_line_mapping_cur); - strcat(mpiio_address_line_mapping, syscom); - mpiio_address_line_mapping_cur = mpiio_address_line_mapping; - } - } - } - } - } - - set_mpiio_line_mapping(mpiio_address_line_mapping, isStackTrace); - free(mpiio_address_line_mapping); - } dxt_mpiio_runtime->record_buf_size += record_size; } diff --git a/darshan-runtime/lib/darshan-hdf5.c b/darshan-runtime/lib/darshan-hdf5.c index a654888bd..868581d80 100644 --- a/darshan-runtime/lib/darshan-hdf5.c +++ b/darshan-runtime/lib/darshan-hdf5.c @@ -57,6 +57,7 @@ DARSHAN_FORWARD_DECL(H5Oopen_by_token, hid_t, (hid_t loc_id, H5O_token_t token)) #endif DARSHAN_FORWARD_DECL(H5Oclose, herr_t, (hid_t object_id)); + /* structure that can track i/o stats for a given HDF5 file record at runtime */ struct hdf5_file_record_ref { diff --git a/darshan-util/darshan-logutils.c b/darshan-util/darshan-logutils.c index 964d02441..3e315db5f 100644 --- a/darshan-util/darshan-logutils.c +++ b/darshan-util/darshan-logutils.c @@ -432,12 +432,14 @@ int darshan_log_get_exe(darshan_fd fd, char *buf) } /* exe string is located before the first line break */ + // printf("%s", state->exe_mnt_data); newline = strchr(state->exe_mnt_data, '\n'); /* copy over the exe string */ if(newline) memcpy(buf, state->exe_mnt_data, (newline - state->exe_mnt_data)); - + else + memcpy(buf, state->exe_mnt_data, strlen(state->exe_mnt_data)); return (0); } diff --git a/darshan-util/darshan-logutils.h b/darshan-util/darshan-logutils.h index d550612f2..031253001 100644 --- a/darshan-util/darshan-logutils.h +++ b/darshan-util/darshan-logutils.h @@ -40,8 +40,8 @@ struct darshan_fd_s struct darshan_log_map mod_map[DARSHAN_MAX_MODS]; /* module-specific log-format versions contained in log */ uint32_t mod_ver[DARSHAN_MAX_MODS]; - char posix_line_mapping[1024]; - char mpiio_line_mapping[1024]; + char posix_line_mapping[4096]; + char mpiio_line_mapping[4096]; /* KEEP OUT -- remaining state hidden in logutils source */ struct darshan_fd_int_state *state; diff --git a/darshan-util/pydarshan/darshan/backend/api_def_c.py b/darshan-util/pydarshan/darshan/backend/api_def_c.py index 5082d6435..46bd33104 100644 --- a/darshan-util/pydarshan/darshan/backend/api_def_c.py +++ b/darshan-util/pydarshan/darshan/backend/api_def_c.py @@ -61,8 +61,8 @@ struct darshan_log_map mod_map[DARSHAN_MAX_MODS]; /* module-specific log-format versions contained in log */ uint32_t mod_ver[DARSHAN_MAX_MODS]; - char posix_line_mapping[1024]; - char mpiio_line_mapping[1024]; + char posix_line_mapping[4096]; + char mpiio_line_mapping[4096]; /* KEEP OUT -- remaining state hidden in logutils source */ struct darshan_fd_int_state *state; diff --git a/darshan-util/pydarshan/darshan/backend/cffi_backend.py b/darshan-util/pydarshan/darshan/backend/cffi_backend.py index fa7716a29..96216f0c2 100644 --- a/darshan-util/pydarshan/darshan/backend/cffi_backend.py +++ b/darshan-util/pydarshan/darshan/backend/cffi_backend.py @@ -52,6 +52,8 @@ check_version(ffi, libdutil) +logfilename = None + _mod_names = [ "NULL", "POSIX", @@ -122,6 +124,7 @@ def log_open(filename): """ b_fname = filename.encode() + logfilename = filename handle = libdutil.darshan_log_open(b_fname) log = {"handle": handle, 'modules': None, 'name_records': None} @@ -577,7 +580,6 @@ def log_get_dxt_record(log, mod_name, reads=True, writes=True, dtype='dict'): """ - modules = log_get_modules(log) if mod_name not in modules: return None @@ -652,6 +654,7 @@ def log_get_dxt_record(log, mod_name, reads=True, writes=True, dtype='dict'): rec['read_segments'] = pd.DataFrame(rec['read_segments']) rec['write_segments'] = pd.DataFrame(rec['write_segments']) + size_of = ffi.sizeof("struct darshan_fd_s") address_line_mapping = ffi.cast("struct darshan_fd_s *", log['handle']) @@ -660,45 +663,54 @@ def log_get_dxt_record(log, mod_name, reads=True, writes=True, dtype='dict'): data = ffi.string(address_line_mapping.posix_line_mapping) data = data.decode('utf-8') data = data.split('\n') + index = 0 + address = "" + function_name = "" for item in data: if item: - item = item.split(",") - address = item[0] - - func_line = item[1] - func_line = func_line.split(":") - function_name = func_line[0] - line_number = func_line[1] - - mapping = { - "address": address, - "function_name": function_name, - "line_number": line_number - } - - rec['posix_address_line_mapping'].append(mapping) + if index == 0: + address = item + index = 1 + elif index == 1: + func_line = item + func_line = func_line.split(":") + function_name = func_line[0] + line_number = func_line[1] + index = 0 + + mapping = { + "address": address, + "function_name": function_name, + "line_number": line_number + } + rec['posix_address_line_mapping'].append(mapping) elif mod_name == 'DXT_MPIIO': rec['mpiio_address_line_mapping'] = [] data = ffi.string(address_line_mapping.mpiio_line_mapping) data = data.decode('utf-8') data = data.split('\n') + index = 0 + address = "" + function_name = "" for item in data: if item: - item = item.split(",") - address = item[0] - - func_line = item[1] - func_line = func_line.split(":") - function_name = func_line[0] - line_number = func_line[1] - - mapping = { - "address": address, - "function_name": function_name, - "line_number": line_number - } - - rec['mpiio_address_line_mapping'].append(mapping) + if index == 0: + address = item + index = 1 + elif index == 1: + func_line = item + func_line = func_line.split(":") + function_name = func_line[0] + line_number = func_line[1] + index = 0 + + mapping = { + "address": address, + "function_name": function_name, + "line_number": line_number + } + + rec['mpiio_address_line_mapping'].append(mapping) libdutil.darshan_free(buf[0]) return rec diff --git a/include/darshan-dxt-log-format.h b/include/darshan-dxt-log-format.h index 056283582..7871c2016 100644 --- a/include/darshan-dxt-log-format.h +++ b/include/darshan-dxt-log-format.h @@ -26,6 +26,8 @@ typedef struct segment_info { double end_time; void *address_array[STACK_TRACE_BUF_SIZE]; int noStackTrace; + int size; + void *address_symbols_array[STACK_TRACE_BUF_SIZE]; } segment_info; #define X(a) a, diff --git a/include/darshan-log-format.h b/include/darshan-log-format.h index 6ac8a3a0d..e3fc9d97e 100644 --- a/include/darshan-log-format.h +++ b/include/darshan-log-format.h @@ -76,8 +76,8 @@ struct darshan_header struct darshan_log_map name_map; struct darshan_log_map mod_map[DARSHAN_MAX_MODS]; uint32_t mod_ver[DARSHAN_MAX_MODS]; - char posix_line_mapping[1024]; - char mpiio_line_mapping[1024]; + char posix_line_mapping[4096]; + char mpiio_line_mapping[4096]; }; /* job-level metadata stored for this application */ From fa3f382d7013040c8a7bf5d89f9e86fdfeeebf37 Mon Sep 17 00:00:00 2001 From: Jean Luca Bez Date: Thu, 21 Sep 2023 12:54:26 -0700 Subject: [PATCH 05/10] applying a few optimizations for POSX --- darshan-runtime/lib/darshan-core.c | 125 ++++++++++++++++++++++------- darshan-runtime/lib/darshan-dxt.c | 103 ++++++++++++++++++------ 2 files changed, 176 insertions(+), 52 deletions(-) diff --git a/darshan-runtime/lib/darshan-core.c b/darshan-runtime/lib/darshan-core.c index 153399a0d..a933e7ed7 100644 --- a/darshan-runtime/lib/darshan-core.c +++ b/darshan-runtime/lib/darshan-core.c @@ -687,42 +687,107 @@ void darshan_core_shutdown(int write_log) #ifdef HAVE_MPI if(using_mpi) { - if(my_rank == 0 && processed == false && final_core->config.stack_trace_trigger) - { - processed = true; - // char posixMappingsPath[1024]; - // getcwd(posixMappingsPath, sizeof(posixMappingsPath)); - // char source[] = "/posix_mappings.txt"; - // strcat(posixMappingsPath, source); + //if(i == DXT_POSIX_MOD && my_rank == 0 && final_core->config.stack_trace_trigger) + if (i == DXT_POSIX_MOD) { + PMPI_Barrier(MPI_COMM_WORLD); + if (my_rank == 0 && final_core->config.stack_trace_trigger) { + //printf("entrou aqui <-----------------\n"); + + FILE *fptr; + + typedef struct { + char address[32]; /* key */ + UT_hash_handle hh; /* makes this structure hashable */ + } unique_stack_struct; + + unique_stack_struct *unique_mem_addr = NULL; + + for (int rank = 0; rank < nprocs; rank++) { + char stack_file_name[50]; + sprintf(stack_file_name, ".%d.darshan-posix", rank); + //printf("opening: %s\n", stack_file_name); + fptr = fopen(stack_file_name, "r"); + if (fptr) { + char line[32]; + + while (fgets(line, sizeof(line), fptr)) { + line[strcspn(line, "\n")] = 0; + //printf("line: %s\n", line); + unique_stack_struct *d = NULL; + + HASH_FIND_STR(unique_mem_addr, line, d); + + if (!d) { + //printf("not found\n"); + unique_stack_struct *e = (unique_stack_struct *) malloc(sizeof *e); + strcpy(e->address, line); + + HASH_ADD_STR(unique_mem_addr, address, e); + } + } - // char mpiioMappingsPath[1024]; - // getcwd(mpiioMappingsPath, sizeof(mpiioMappingsPath)); - // char source1[] = "/mpiio_mappings.txt"; - // strcat(mpiioMappingsPath, source1); + //printf("complete\n"); - // FILE *FileOpen; - // char syscom[256]; - // char line[100]; + fclose(fptr); - // char sPath[1024] = ""; - // char *pTmp; + remove(stack_file_name); + } else { + printf("unable to open the file\n"); + } + } - // if (( pTmp =getenv( "LD_PRELOAD" )) != NULL ) - // strncpy( sPath, pTmp, 1024 - 1 ); // Save a copy for our use. + unique_stack_struct *d = NULL; - // char * token = strtok(sPath, "-"); - // char source2[] = "-runtime/lib/script.py"; - // strcat(sPath,source2); + char * exe_name = darshan_exe(); - // sprintf(syscom, "python3 %s %s %s %s", sPath, posixMappingsPath, mpiioMappingsPath, exe_name); - // FileOpen = popen(syscom, "r"); - // while (fgets(line, sizeof line, FileOpen)) - // { - // printf("%s", line); - // } + int line_mappings_index = 0; + + //char * address_line_mapping = NULL; + char address_line_mapping[4096] = {}; + //char * address_line_mapping_cur = ""; + for (d = unique_mem_addr; d != NULL; d = (unique_stack_struct *)(d->hh.next)) { + //printf("global_unique -> %s\n", d->address); + FILE *fp; + char cmd[256]; + char *line = NULL; + size_t len = 0; + sprintf(cmd, "addr2line -a %s -e %s", d->address, exe_name); + //printf("CMD: %s", cmd); + fp = popen(cmd, "r"); + + while (getline(&line, &len, fp) != -1) + //while (fgets(line, sizeof line, fp)) + { + //printf("--> [%s]\n", line); + if (strstr(line, "0x") == NULL && strstr(line, "(nil)") == NULL && strstr(line, "(null)") == NULL) { + if (strstr(line, "??") == NULL) { + sprintf(cmd, "%p, %s", d->address, line); + + //address_line_mapping = (char *)calloc(strlen(address_line_mapping) + strlen(cmd) + 1, sizeof(char)); + //strcat(address_line_mapping, address_line_mapping_cur); + strcat(address_line_mapping, cmd); + //address_line_mapping_cur = address_line_mapping; + + //funcionou + //printf("~~~~ <%s>\n", line); + //sprintf(&final_core->log_hdr_p->posix_line_mapping[line_mappings_index++], "%s, %s", d->address, line); + + //final_core->log_hdr_p->posix_line_mapping[line_mappings_index++] = *address_line_mapping; + } + } + } + + HASH_DEL(unique_mem_addr, d); + } + + strcpy(final_core->log_hdr_p->posix_line_mapping, address_line_mapping); + + //free(address_line_mapping); + + //free(exe_name); // FILE * fp; // char * line1 = NULL; @@ -753,7 +818,7 @@ void darshan_core_shutdown(int write_log) // } // } - char * unique_memory_addresses; + /*char * unique_memory_addresses; int size = STACK_TRACE_BUF_SIZE; unique_memory_addresses = (int*)calloc(size, sizeof(char)); int curr_size = 0; @@ -765,7 +830,7 @@ void darshan_core_shutdown(int write_log) fp = fopen("/tmp/posix_mappings.txt", "r"); if (fp == NULL) - exit(EXIT_FAILURE); + exit(EXIT_FAILURE);*/ // while ((read = getline(&line1, &len, fp)) != -1) { // int flag = 0; @@ -802,6 +867,7 @@ void darshan_core_shutdown(int write_log) // remove(mpiioMappingsPath); // remove(posixMappingsPath); } + } } #endif @@ -2961,3 +3027,4 @@ void get_log_file_path(char *path) * * vim: ts=8 sts=4 sw=4 expandtab */ + diff --git a/darshan-runtime/lib/darshan-dxt.c b/darshan-runtime/lib/darshan-dxt.c index 7fe777ddc..81314514e 100644 --- a/darshan-runtime/lib/darshan-dxt.c +++ b/darshan-runtime/lib/darshan-dxt.c @@ -276,10 +276,23 @@ void dxt_posix_write(darshan_record_id rec_id, int64_t offset, rec_ref->write_traces[file_rec->write_count].end_time = end_time; if (isStackTrace){ int size = backtrace (rec_ref->write_traces[file_rec->write_count].address_array, STACK_TRACE_BUF_SIZE); - for (int i = size; i < STACK_TRACE_BUF_SIZE; i++){ - rec_ref->write_traces[file_rec->write_count].address_array[i] = NULL; - } - + // JL I believe we can remove this since the symbols will help remove whatever is not from the application + //for (int i = size; i < STACK_TRACE_BUF_SIZE; i++){ + // rec_ref->write_traces[file_rec->write_count].address_array[i] = NULL; + //} + + /*char **strings; + strings = backtrace_symbols(rec_ref->write_traces[file_rec->write_count].address_array, size); + + if (strings != NULL) + for(int i = 0; i < size; i++) { + printf("%d: %p %s\n", + i, + (int)rec_ref->write_traces[file_rec->write_count].address_array[i], + strings[i] + ); + } + */ // FILE *fptr; // fptr = fopen(posixMappingsPath, "a+"); @@ -346,9 +359,10 @@ void dxt_posix_read(darshan_record_id rec_id, int64_t offset, rec_ref->read_traces[file_rec->read_count].end_time = end_time; if (isStackTrace){ int size = backtrace (rec_ref->read_traces[file_rec->read_count].address_array , STACK_TRACE_BUF_SIZE); - for (int i = size; i < STACK_TRACE_BUF_SIZE; i++){ - rec_ref->read_traces[file_rec->read_count].address_array[i] = NULL; - } + // JL removed as we can have the symbols do this for us + //for (int i = size; i < STACK_TRACE_BUF_SIZE; i++){ + // rec_ref->read_traces[file_rec->read_count].address_array[i] = NULL; + //} // FILE *fptr; // fptr = fopen(posixMappingsPath, "a+"); @@ -415,9 +429,9 @@ void dxt_mpiio_write(darshan_record_id rec_id, int64_t offset, rec_ref->write_traces[file_rec->write_count].end_time = end_time; if (isStackTrace){ int size = backtrace (rec_ref->write_traces[file_rec->write_count].address_array, STACK_TRACE_BUF_SIZE); - for (int i = size; i < STACK_TRACE_BUF_SIZE; i++){ - rec_ref->write_traces[file_rec->write_count].address_array[i] = NULL; - } + //for (int i = size; i < STACK_TRACE_BUF_SIZE; i++){ + // rec_ref->write_traces[file_rec->write_count].address_array[i] = NULL; + //} // FILE *fptr; // fptr = fopen(mpiioMappingsPath, "a+"); @@ -484,9 +498,9 @@ void dxt_mpiio_read(darshan_record_id rec_id, int64_t offset, rec_ref->read_traces[file_rec->read_count].end_time = end_time; if (isStackTrace){ int size = backtrace (rec_ref->read_traces[file_rec->read_count].address_array , STACK_TRACE_BUF_SIZE); - for (int i = size; i < STACK_TRACE_BUF_SIZE; i++){ - rec_ref->read_traces[file_rec->read_count].address_array[i] = NULL; - } + //for (int i = size; i < STACK_TRACE_BUF_SIZE; i++){ + // rec_ref->read_traces[file_rec->read_count].address_array[i] = NULL; + //} // FILE *fptr; // fptr = fopen(mpiioMappingsPath, "a+"); @@ -923,21 +937,23 @@ static void dxt_serialize_posix_records(void *rec_ref_p, void *user_ptr) // } // path[ind] ='\0'; - + char stack_file_name[50]; + sprintf(stack_file_name, ".%d.darshan-posix", dxt_my_rank); + FILE *fptr; - fptr = fopen("/tmp/posix_mappings.txt", "a"); - - for(int i = 0; i < record_write_count; i++){ + fptr = fopen(stack_file_name, "w"); + + /*for(int i = 0; i < record_write_count; i++){ char **strings; int size = rec_ref->write_traces[i].size; strings = backtrace_symbols (rec_ref->write_traces[i].address_array, size); if (strings != NULL) { for (int j = 0; j < size; j++){ - // printf("%s\n", strings[i]); - fprintf(fptr, "%s\n", strings[j]); + fwrite(fptr, "%s\n", strings[j]); } } + free(strings); } for(int i = 0; i < record_read_count; i++){ @@ -951,7 +967,47 @@ static void dxt_serialize_posix_records(void *rec_ref_p, void *user_ptr) fprintf(fptr, "%s\n", strings[j]); } } + free(strings); + }*/ + + typedef struct { + void *address; /* key */ + UT_hash_handle hh; /* makes this structure hashable */ + } stack_struct; + + stack_struct *unique_mem_addr = NULL; + + for(int i = 0; i < record_write_count; i++){ + int size = rec_ref->write_traces[i].size; + + for (int j = 0; j < size; j++) { + stack_struct *d = NULL; + + void *addr = rec_ref->write_traces[i].address_array[j]; + // printf("looking for %p\n", addr); + HASH_FIND_PTR(unique_mem_addr, &addr, d); + + if (!d) { + //printf("not found\n"); + stack_struct *e = (stack_struct *) malloc(sizeof *e); + + e->address = addr; + + HASH_ADD_PTR(unique_mem_addr, address, e); + } + } } + + stack_struct *d = NULL; + + for (d = unique_mem_addr; d != NULL; d = (stack_struct *)(d->hh.next)) { + //printf("unique-> %p\n", d->address); + fprintf(fptr, "%p\n", d->address); + + HASH_DEL(unique_mem_addr, d); + } + + //fflush(fptr); fclose(fptr); } /* @@ -981,7 +1037,7 @@ static void dxt_serialize_posix_records(void *rec_ref_p, void *user_ptr) tmp_buf_ptr = (void *)(tmp_buf_ptr + record_read_count * sizeof(segment_info)); - printf("%i\n", file_rec->base_rec.rank); + //printf("%i\n", file_rec->base_rec.rank); dxt_posix_runtime->record_buf_size += record_size; } @@ -1065,7 +1121,7 @@ static void dxt_serialize_mpiio_records(void *rec_ref_p, void *user_ptr) // } // path[ind] ='\0'; - FILE *fptr; + /*FILE *fptr; fptr = fopen("/tmp/mpiio_mappings.txt", "a"); for(int i = 0; i < record_write_count; i++){ @@ -1090,8 +1146,8 @@ static void dxt_serialize_mpiio_records(void *rec_ref_p, void *user_ptr) fprintf(fptr, "%s\n", strings[j]); } } - } - fclose(fptr); + }*/ + //fclose(fptr); } /* * Buffer format: @@ -1173,3 +1229,4 @@ static void dxt_mpiio_cleanup() * * vim: ts=8 sts=4 sw=4 expandtab */ + From fa13927b6d7ced11da6a6c82807b40d07cbba0cf Mon Sep 17 00:00:00 2001 From: hammad45 Date: Fri, 22 Sep 2023 14:16:31 -0700 Subject: [PATCH 06/10] Optimized code for getting address mappings --- darshan-runtime/lib/darshan-core.c | 367 ++++++++---------- darshan-runtime/lib/darshan-dxt.c | 275 +++++-------- .../pydarshan/darshan/backend/api_def_c.py | 1 + .../pydarshan/darshan/backend/cffi_backend.py | 68 ++-- darshan-util/pydarshan/darshan/report.py | 4 +- include/darshan-dxt-log-format.h | 1 - 6 files changed, 310 insertions(+), 406 deletions(-) diff --git a/darshan-runtime/lib/darshan-core.c b/darshan-runtime/lib/darshan-core.c index a933e7ed7..a350b6a96 100644 --- a/darshan-runtime/lib/darshan-core.c +++ b/darshan-runtime/lib/darshan-core.c @@ -31,13 +31,15 @@ #include #include #include +#include #include #include #include #include #include +#include -#ifdef HAVE_MPI +#ifdef HAVE_/global/homes/h/hather/darshan/darshan-runtime/install/libMPI #include #endif @@ -75,10 +77,6 @@ static struct darshan_core_mnt_data mnt_data_array[DARSHAN_MAX_MNTS]; static int mnt_data_count = 0; static char *exe_name = ""; -static char *log_path_mappings = NULL; -static char *posix_line_mapping = ""; -static char *mpiio_line_mapping = ""; -bool processed = false; #ifdef DARSHAN_BGQ extern void bgq_runtime_initialize(); @@ -564,11 +562,6 @@ void darshan_core_shutdown(int write_log) /* get the log file name */ darshan_get_logfile_name(logfile_name, final_core); - log_path_mappings = malloc(__DARSHAN_PATH_MAX); - - for (int i = 0; i < strlen(logfile_name); i++){ - log_path_mappings[i] = logfile_name[i]; - } if(strlen(logfile_name) == 0) { @@ -691,182 +684,197 @@ void darshan_core_shutdown(int write_log) if (i == DXT_POSIX_MOD) { PMPI_Barrier(MPI_COMM_WORLD); if (my_rank == 0 && final_core->config.stack_trace_trigger) { - //printf("entrou aqui <-----------------\n"); - - FILE *fptr; + FILE *fptr; - typedef struct { - char address[32]; /* key */ - UT_hash_handle hh; /* makes this structure hashable */ - } unique_stack_struct; + typedef struct { + char address[32]; /* key */ + UT_hash_handle hh; /* makes this structure hashable */ + } unique_stack_struct; - unique_stack_struct *unique_mem_addr = NULL; + unique_stack_struct *unique_mem_addr = NULL; - for (int rank = 0; rank < nprocs; rank++) { - char stack_file_name[50]; - sprintf(stack_file_name, ".%d.darshan-posix", rank); - //printf("opening: %s\n", stack_file_name); - fptr = fopen(stack_file_name, "r"); - if (fptr) { - char line[32]; + for (int rank = 0; rank < nprocs; rank++) { + char stack_file_name_posix[50]; + sprintf(stack_file_name_posix, ".%d.darshan-posix", rank); + fptr = fopen(stack_file_name_posix, "r"); + if (fptr) { + char line[32]; - while (fgets(line, sizeof(line), fptr)) { - line[strcspn(line, "\n")] = 0; - //printf("line: %s\n", line); - unique_stack_struct *d = NULL; + while (fgets(line, sizeof(line), fptr)) { + line[strcspn(line, "\n")] = 0; + unique_stack_struct *d = NULL; - HASH_FIND_STR(unique_mem_addr, line, d); + HASH_FIND_STR(unique_mem_addr, line, d); - if (!d) { - //printf("not found\n"); - unique_stack_struct *e = (unique_stack_struct *) malloc(sizeof *e); - strcpy(e->address, line); + if (!d) { + unique_stack_struct *e = (unique_stack_struct *) malloc(sizeof *e); + strcpy(e->address, line); - HASH_ADD_STR(unique_mem_addr, address, e); + HASH_ADD_STR(unique_mem_addr, address, e); + } } + + fclose(fptr); + remove(stack_file_name_posix); + } else { + printf("unable to open POSIX file\n"); } + } - //printf("complete\n"); + unique_stack_struct *d = NULL; + char * exe_name = darshan_exe(); + int line_mappings_index = 0; + char address_line_mapping[4096] = {}; - fclose(fptr); + for (d = unique_mem_addr; d != NULL; d = (unique_stack_struct *)(d->hh.next)) { + FILE *fp; + char cmd[256]; + char *line = NULL; + size_t len = 0; - remove(stack_file_name); - } else { - printf("unable to open the file\n"); + // sprintf(cmd, "addr2line -a %s -e %s", d->address, exe_name); + char addr[32]; + sprintf(addr, "%s", d->address); + + char *const args[] = { "/usr/bin/addr2line", "-a", addr, "-e", exe_name, NULL }; + + int pipe_fd[2]; + pid_t child_pid; + int status; + + // Create a pipe to capture the command's output + if (pipe(pipe_fd) == -1) { + perror("pipe"); + // return 1; + } + int ret; + // Use posix_spawn to execute the command + posix_spawn_file_actions_t action; + posix_spawn_file_actions_init(&action); + posix_spawn_file_actions_addclose(&action, pipe_fd[0]); // Close the read end of the pipe + posix_spawn_file_actions_adddup2(&action, pipe_fd[1], STDOUT_FILENO); // Redirect stdout to the write end of the pipe + if (posix_spawn(&child_pid, "/usr/bin/addr2line", &action, NULL, args, NULL) == 0) { + + // Close the write end of the pipe in the parent process + close(pipe_fd[1]); + + // Read the output from the pipe + char buffer[4096]; + ssize_t bytes_read; + while ((bytes_read = read(pipe_fd[0], buffer, sizeof(buffer))) > 0) { + fwrite(buffer, 1, bytes_read, stdout); + } + + char * token = strtok(buffer, "\n"); + token = strtok(NULL, "\n"); + sprintf(cmd, "%s, %s\n", buffer, token); + strcat(address_line_mapping, cmd); + } + HASH_DEL(unique_mem_addr, d); } + + strcpy(final_core->log_hdr_p->posix_line_mapping, address_line_mapping); } + } + else if (i == DXT_MPIIO_MOD) { + PMPI_Barrier(MPI_COMM_WORLD); + if (my_rank == 0 && final_core->config.stack_trace_trigger) { + FILE *fptr; - unique_stack_struct *d = NULL; + typedef struct { + char address[32]; /* key */ + UT_hash_handle hh; /* makes this structure hashable */ + } unique_stack_struct; - char * exe_name = darshan_exe(); + unique_stack_struct *unique_mem_addr = NULL; - int line_mappings_index = 0; - - //char * address_line_mapping = NULL; - char address_line_mapping[4096] = {}; - //char * address_line_mapping_cur = ""; + for (int rank = 0; rank < nprocs; rank++) { + char stack_file_name_mpiio[50]; + sprintf(stack_file_name_mpiio, ".%d.darshan-mpiio", rank); + fptr = fopen(stack_file_name_mpiio, "r"); + if (fptr) { + char line[32]; - for (d = unique_mem_addr; d != NULL; d = (unique_stack_struct *)(d->hh.next)) { - //printf("global_unique -> %s\n", d->address); + while (fgets(line, sizeof(line), fptr)) { + line[strcspn(line, "\n")] = 0; + unique_stack_struct *d = NULL; - FILE *fp; - char cmd[256]; - char *line = NULL; - size_t len = 0; + HASH_FIND_STR(unique_mem_addr, line, d); - sprintf(cmd, "addr2line -a %s -e %s", d->address, exe_name); - //printf("CMD: %s", cmd); - fp = popen(cmd, "r"); + if (!d) { + unique_stack_struct *e = (unique_stack_struct *) malloc(sizeof *e); + strcpy(e->address, line); - while (getline(&line, &len, fp) != -1) - //while (fgets(line, sizeof line, fp)) - { - //printf("--> [%s]\n", line); - if (strstr(line, "0x") == NULL && strstr(line, "(nil)") == NULL && strstr(line, "(null)") == NULL) { - if (strstr(line, "??") == NULL) { - sprintf(cmd, "%p, %s", d->address, line); - - //address_line_mapping = (char *)calloc(strlen(address_line_mapping) + strlen(cmd) + 1, sizeof(char)); - //strcat(address_line_mapping, address_line_mapping_cur); - strcat(address_line_mapping, cmd); - //address_line_mapping_cur = address_line_mapping; - - //funcionou - //printf("~~~~ <%s>\n", line); - //sprintf(&final_core->log_hdr_p->posix_line_mapping[line_mappings_index++], "%s, %s", d->address, line); - - //final_core->log_hdr_p->posix_line_mapping[line_mappings_index++] = *address_line_mapping; + HASH_ADD_STR(unique_mem_addr, address, e); + } } + + fclose(fptr); + remove(stack_file_name_mpiio); + } else { + printf("unable to open MPIIO file\n"); } } - HASH_DEL(unique_mem_addr, d); - } + unique_stack_struct *d = NULL; - strcpy(final_core->log_hdr_p->posix_line_mapping, address_line_mapping); - - //free(address_line_mapping); - - //free(exe_name); - - // FILE * fp; - // char * line1 = NULL; - // size_t len = 0; - // ssize_t read; - - // fp = fopen(posixMappingsPath, "r"); - // if (fp == NULL) - // exit(EXIT_FAILURE); - - // int ind = 0; - // while ((read = getline(&line1, &len, fp)) != -1) { - // for (int i=0; i < strlen(line1); i++){ - // final_core->log_hdr_p->posix_line_mapping[ind] = line1[i]; - // ind = ind + 1; - // } - // } - - // fp = fopen(mpiioMappingsPath, "r"); - // if (fp == NULL) - // exit(EXIT_FAILURE); - - // ind = 0; - // while ((read = getline(&line1, &len, fp)) != -1) { - // for (int i=0; i < strlen(line1); i++){ - // final_core->log_hdr_p->mpiio_line_mapping[ind] = line1[i]; - // ind = ind + 1; - // } - // } - - /*char * unique_memory_addresses; - int size = STACK_TRACE_BUF_SIZE; - unique_memory_addresses = (int*)calloc(size, sizeof(char)); - int curr_size = 0; - - FILE *fp; - char * line1 = NULL; - size_t len = 0; - ssize_t read; - - fp = fopen("/tmp/posix_mappings.txt", "r"); - if (fp == NULL) - exit(EXIT_FAILURE);*/ - - // while ((read = getline(&line1, &len, fp)) != -1) { - // int flag = 0; - // if (strstr(line1, exe_name) != NULL) { - // char * token = strtok(line1, "["); - // token = strtok(NULL, "["); - // token = strtok(token, "]"); - // // if (strlen(token) < 16){ - // // // int number = (int)strtol(token, NULL, 16); - - // // // printf("%i\n", number); - // // // for(int k = 0; k < curr_size; k++){ - // // // if (unique_memory_addresses[k] == atoi(token)){ - // // // flag = 1; - // // // break; - // // // } - // // // } - // // // if (flag == 0){ - // // // if (curr_size == size){ - // // // size = size * 2; - // // // unique_memory_addresses = realloc(unique_memory_addresses, size * sizeof(int)); - // // // } - // // // unique_memory_addresses[curr_size] = atoi(token); - // // // curr_size = curr_size + 1; - // // // } - // // } - // } - // } - - // for (int i = 0; i < curr_size; i++){ - // printf("%i\n", unique_memory_addresses[i]); - // } - - // remove(mpiioMappingsPath); - // remove(posixMappingsPath); - } + char * exe_name = darshan_exe(); + + int line_mappings_index = 0; + + char address_line_mapping[4096] = {}; + + for (d = unique_mem_addr; d != NULL; d = (unique_stack_struct *)(d->hh.next)) { + + FILE *fp; + char cmd[256]; + char *line = NULL; + size_t len = 0; + + char addr[32]; + sprintf(addr, "%s", d->address); + + char *const args[] = { "/usr/bin/addr2line", "-a", addr, "-e", exe_name, NULL }; + + int pipe_fd[2]; + pid_t child_pid; + int status; + + if (pipe(pipe_fd) == -1) { + perror("pipe"); + // return 1; + } + int ret; + // Use posix_spawn to execute the command + posix_spawn_file_actions_t action; + posix_spawn_file_actions_init(&action); + posix_spawn_file_actions_addclose(&action, pipe_fd[0]); // Close the read end of the pipe + posix_spawn_file_actions_adddup2(&action, pipe_fd[1], STDOUT_FILENO); // Redirect stdout to the write end of the pipe + if (posix_spawn(&child_pid, "/usr/bin/addr2line", &action, NULL, args, NULL) == 0) { + // Close the write end of the pipe in the parent process + close(pipe_fd[1]); + + // Read the output from the pipe + char buffer[4096]; + ssize_t bytes_read; + while ((bytes_read = read(pipe_fd[0], buffer, sizeof(buffer))) > 0) { + fwrite(buffer, 1, bytes_read, stdout); + } + + // Wait for the child process to complete + waitpid(child_pid, &status, 0); + + char * token = strtok(buffer, "\n"); + token = strtok(NULL, "\n"); + sprintf(cmd, "%s, %s\n", buffer, token); + strcat(address_line_mapping, cmd); + } + + HASH_DEL(unique_mem_addr, d); + } + + strcpy(final_core->log_hdr_p->mpiio_line_mapping, address_line_mapping); + } } } #endif @@ -2919,36 +2927,6 @@ void *darshan_core_register_record( return(rec_buf);; } -void set_posix_line_mapping(char *mapping_array, bool isStackTrace){ - - if (isStackTrace){ - posix_line_mapping = (char *)calloc(strlen(mapping_array), sizeof(char)); - - for (int i=0; i < strlen(mapping_array); i++){ - posix_line_mapping[i] = mapping_array[i]; - } - } - else{ - posix_line_mapping = ""; - } - return; -} - -void set_mpiio_line_mapping(char *mapping_array, bool isStackTrace){ - - if (isStackTrace){ - mpiio_line_mapping = (char *)calloc(strlen(mapping_array), sizeof(char)); - - for (int i=0; i < strlen(mapping_array); i++){ - mpiio_line_mapping[i] = mapping_array[i]; - } - } - else{ - mpiio_line_mapping = ""; - } - return; -} - char *darshan_core_lookup_record_name(darshan_record_id rec_id) { struct darshan_core_name_record_ref *ref; @@ -3010,15 +2988,6 @@ char *darshan_exe() return exe_name; } -void get_log_file_path(char *path) -{ - - for (int i = 0; i < strlen(log_path_mappings); i++){ - path[i] = log_path_mappings[i]; - } - return; -} - /* * Local variables: * c-indent-level: 4 diff --git a/darshan-runtime/lib/darshan-dxt.c b/darshan-runtime/lib/darshan-dxt.c index 81314514e..c6d3068c6 100644 --- a/darshan-runtime/lib/darshan-dxt.c +++ b/darshan-runtime/lib/darshan-dxt.c @@ -62,8 +62,7 @@ typedef int64_t off64_t; #define STACK_TRACE_BUF_SIZE 60 bool isStackTrace = false; -char posixMappingsPath[1024]; -char mpiioMappingsPath[1024]; + /* The dxt_file_record_ref structure maintains necessary runtime metadata * for the DXT file record (dxt_file_record structure, defined in * darshan-dxt-log-format.h) pointed to by 'file_rec'. This metadata @@ -159,10 +158,6 @@ void dxt_posix_runtime_initialize() }; int ret; - // getcwd(posixMappingsPath, sizeof(posixMappingsPath)); - // char source[] = "/posix_mappings.txt"; - // strcat(posixMappingsPath, source); - /* register the DXT module with darshan core */ ret = darshan_core_register_module( DXT_POSIX_MOD, @@ -205,9 +200,6 @@ void dxt_mpiio_runtime_initialize() }; int ret; - // getcwd(mpiioMappingsPath, sizeof(mpiioMappingsPath)); - // char source[] = "/mpiio_mappings.txt"; - // strcat(mpiioMappingsPath, source); /* register the DXT module with darshan core */ ret = darshan_core_register_module( DXT_MPIIO_MOD, @@ -276,37 +268,6 @@ void dxt_posix_write(darshan_record_id rec_id, int64_t offset, rec_ref->write_traces[file_rec->write_count].end_time = end_time; if (isStackTrace){ int size = backtrace (rec_ref->write_traces[file_rec->write_count].address_array, STACK_TRACE_BUF_SIZE); - // JL I believe we can remove this since the symbols will help remove whatever is not from the application - //for (int i = size; i < STACK_TRACE_BUF_SIZE; i++){ - // rec_ref->write_traces[file_rec->write_count].address_array[i] = NULL; - //} - - /*char **strings; - strings = backtrace_symbols(rec_ref->write_traces[file_rec->write_count].address_array, size); - - if (strings != NULL) - for(int i = 0; i < size; i++) { - printf("%d: %p %s\n", - i, - (int)rec_ref->write_traces[file_rec->write_count].address_array[i], - strings[i] - ); - } - */ - // FILE *fptr; - // fptr = fopen(posixMappingsPath, "a+"); - - // char **strings;; - // strings = backtrace_symbols (rec_ref->write_traces[file_rec->write_count].address_array, size); - // if (strings != NULL) - // { - // for (int j = 0; j < size; j++){ - // if (strings[j] != NULL) - // fprintf(fptr, "%s\n", strings[j]); - // } - // } - - // fclose(fptr); rec_ref->write_traces[file_rec->write_count].noStackTrace = 1; rec_ref->write_traces[file_rec->write_count].size = size; } @@ -359,25 +320,6 @@ void dxt_posix_read(darshan_record_id rec_id, int64_t offset, rec_ref->read_traces[file_rec->read_count].end_time = end_time; if (isStackTrace){ int size = backtrace (rec_ref->read_traces[file_rec->read_count].address_array , STACK_TRACE_BUF_SIZE); - // JL removed as we can have the symbols do this for us - //for (int i = size; i < STACK_TRACE_BUF_SIZE; i++){ - // rec_ref->read_traces[file_rec->read_count].address_array[i] = NULL; - //} - - // FILE *fptr; - // fptr = fopen(posixMappingsPath, "a+"); - - // char **strings;; - // strings = backtrace_symbols (rec_ref->read_traces[file_rec->read_count].address_array, size); - // if (strings != NULL) - // { - // for (int j = 0; j < size; j++){ - // if (strings[j] != NULL) - // fprintf(fptr, "%s\n", strings[j]); - // } - // } - - // fclose(fptr); rec_ref->read_traces[file_rec->read_count].noStackTrace = 1; rec_ref->read_traces[file_rec->read_count].size = size; } @@ -429,23 +371,6 @@ void dxt_mpiio_write(darshan_record_id rec_id, int64_t offset, rec_ref->write_traces[file_rec->write_count].end_time = end_time; if (isStackTrace){ int size = backtrace (rec_ref->write_traces[file_rec->write_count].address_array, STACK_TRACE_BUF_SIZE); - //for (int i = size; i < STACK_TRACE_BUF_SIZE; i++){ - // rec_ref->write_traces[file_rec->write_count].address_array[i] = NULL; - //} - // FILE *fptr; - // fptr = fopen(mpiioMappingsPath, "a+"); - - // char **strings;; - // strings = backtrace_symbols (rec_ref->write_traces[file_rec->write_count].address_array, size); - // if (strings != NULL) - // { - // for (int j = 0; j < size; j++){ - // if (strings[j] != NULL) - // fprintf(fptr, "%s\n", strings[j]); - // } - // } - - // fclose(fptr); rec_ref->write_traces[file_rec->write_count].noStackTrace = 1; rec_ref->write_traces[file_rec->write_count].size = size; } @@ -498,23 +423,6 @@ void dxt_mpiio_read(darshan_record_id rec_id, int64_t offset, rec_ref->read_traces[file_rec->read_count].end_time = end_time; if (isStackTrace){ int size = backtrace (rec_ref->read_traces[file_rec->read_count].address_array , STACK_TRACE_BUF_SIZE); - //for (int i = size; i < STACK_TRACE_BUF_SIZE; i++){ - // rec_ref->read_traces[file_rec->read_count].address_array[i] = NULL; - //} - // FILE *fptr; - // fptr = fopen(mpiioMappingsPath, "a+"); - - // char **strings;; - // strings = backtrace_symbols (rec_ref->read_traces[file_rec->read_count].address_array, size); - // if (strings != NULL) - // { - // for (int j = 0; j < size; j++){ - // if (strings[j] != NULL) - // fprintf(fptr, "%s\n", strings[j]); - // } - // } - - // fclose(fptr); rec_ref->read_traces[file_rec->read_count].noStackTrace = 1; rec_ref->read_traces[file_rec->read_count].size = size; } @@ -919,41 +827,46 @@ static void dxt_serialize_posix_records(void *rec_ref_p, void *user_ptr) if (record_write_count == 0 && record_read_count == 0) return; - if (isStackTrace){ - // clock_t start, end; - // double cpu_time_used; - // start = clock(); - - // char *path = malloc(4096); - // get_log_file_path(path); - // char substr[256] = "posix_mappings.txt"; - // char * pch; - // pch=strchr(path,'.'); - - // int ind = strlen(path) - strlen(pch) + 1; - // for (int i = 0; i < strlen(substr); i++){ - // path[ind] = substr[i]; - // ind = ind + 1; - // } - // path[ind] ='\0'; - + if (isStackTrace){ char stack_file_name[50]; sprintf(stack_file_name, ".%d.darshan-posix", dxt_my_rank); FILE *fptr; fptr = fopen(stack_file_name, "w"); - /*for(int i = 0; i < record_write_count; i++){ + typedef struct { + void *address; /* key */ + UT_hash_handle hh; /* makes this structure hashable */ + } stack_struct; + + stack_struct *unique_mem_addr = NULL; + + char * exe_name = darshan_exe(); + for(int i = 0; i < record_write_count; i++){ char **strings; int size = rec_ref->write_traces[i].size; strings = backtrace_symbols (rec_ref->write_traces[i].address_array, size); if (strings != NULL) { for (int j = 0; j < size; j++){ - fwrite(fptr, "%s\n", strings[j]); + if (strstr(strings[j], exe_name) != NULL) { + stack_struct *d = NULL; + char * token = strtok(strings[j], "["); + token = strtok(NULL, "["); + token = strtok(token, "]"); + int number = (int)strtol(token, NULL, 16); + void *addr = number; + HASH_FIND_PTR(unique_mem_addr, &addr, d); + + if (!d) { + stack_struct *e = (stack_struct *) malloc(sizeof *e); + e->address = addr; + HASH_ADD_PTR(unique_mem_addr, address, e); + } + } } + free(strings); } - free(strings); } for(int i = 0; i < record_read_count; i++){ @@ -963,53 +876,37 @@ static void dxt_serialize_posix_records(void *rec_ref_p, void *user_ptr) if (strings != NULL) { for (int j = 0; j < size; j++){ - // printf("%s\n", strings[i]); - fprintf(fptr, "%s\n", strings[j]); - } - } - free(strings); - }*/ - - typedef struct { - void *address; /* key */ - UT_hash_handle hh; /* makes this structure hashable */ - } stack_struct; - - stack_struct *unique_mem_addr = NULL; - - for(int i = 0; i < record_write_count; i++){ - int size = rec_ref->write_traces[i].size; - - for (int j = 0; j < size; j++) { - stack_struct *d = NULL; - - void *addr = rec_ref->write_traces[i].address_array[j]; - // printf("looking for %p\n", addr); - HASH_FIND_PTR(unique_mem_addr, &addr, d); - - if (!d) { - //printf("not found\n"); - stack_struct *e = (stack_struct *) malloc(sizeof *e); - - e->address = addr; - - HASH_ADD_PTR(unique_mem_addr, address, e); + if (strstr(strings[j], exe_name) != NULL) { + stack_struct *d = NULL; + char * token = strtok(strings[j], "["); + token = strtok(NULL, "["); + token = strtok(token, "]"); + int number = (int)strtol(token, NULL, 16); + void *addr = number; + HASH_FIND_PTR(unique_mem_addr, &addr, d); + + if (!d) { + stack_struct *e = (stack_struct *) malloc(sizeof *e); + e->address = addr; + HASH_ADD_PTR(unique_mem_addr, address, e); + } + } } + free(strings); } } stack_struct *d = NULL; for (d = unique_mem_addr; d != NULL; d = (stack_struct *)(d->hh.next)) { - //printf("unique-> %p\n", d->address); fprintf(fptr, "%p\n", d->address); - HASH_DEL(unique_mem_addr, d); } //fflush(fptr); fclose(fptr); } + /* * Buffer format: * dxt_file_record + write_traces + read_traces @@ -1103,26 +1000,21 @@ static void dxt_serialize_mpiio_records(void *rec_ref_p, void *user_ptr) if (record_write_count == 0 && record_read_count == 0) return; - if (isStackTrace){ - // clock_t start, end; - // double cpu_time_used; - // start = clock(); - - // char *path = malloc(4096); - // get_log_file_path(path); - // char substr[256] = "/tmp/mpiio_mappings.txt"; - // char * pch; - // pch=strchr(path,'.'); - - // int ind = strlen(path) - strlen(pch) + 1; - // for (int i = 0; i < strlen(substr); i++){ - // path[ind] = substr[i]; - // ind = ind + 1; - // } - // path[ind] ='\0'; - - /*FILE *fptr; - fptr = fopen("/tmp/mpiio_mappings.txt", "a"); + if (isStackTrace){ + char stack_file_name[50]; + sprintf(stack_file_name, ".%d.darshan-mpiio", dxt_my_rank); + + FILE *fptr; + fptr = fopen(stack_file_name, "w"); + + typedef struct { + void *address; /* key */ + UT_hash_handle hh; /* makes this structure hashable */ + } stack_struct; + + stack_struct *unique_mem_addr = NULL; + + char * exe_name = darshan_exe(); for(int i = 0; i < record_write_count; i++){ char **strings; @@ -1131,23 +1023,62 @@ static void dxt_serialize_mpiio_records(void *rec_ref_p, void *user_ptr) if (strings != NULL) { for (int j = 0; j < size; j++){ - fprintf(fptr, "%s\n", strings[j]); + if (strstr(strings[j], exe_name) != NULL) { + stack_struct *d = NULL; + char * token = strtok(strings[j], "["); + token = strtok(NULL, "["); + token = strtok(token, "]"); + int number = (int)strtol(token, NULL, 16); + void *addr = number; + HASH_FIND_PTR(unique_mem_addr, &addr, d); + + if (!d) { + stack_struct *e = (stack_struct *) malloc(sizeof *e); + e->address = addr; + HASH_ADD_PTR(unique_mem_addr, address, e); + } + } } + free(strings); } } - for(int i = 0; i < record_read_count; i++){ + for(int i = 0; i < record_read_count; i++){ char **strings; int size = rec_ref->read_traces[i].size; strings = backtrace_symbols (rec_ref->read_traces[i].address_array, size); if (strings != NULL) { for (int j = 0; j < size; j++){ - fprintf(fptr, "%s\n", strings[j]); + if (strstr(strings[j], exe_name) != NULL) { + stack_struct *d = NULL; + char * token = strtok(strings[j], "["); + token = strtok(NULL, "["); + token = strtok(token, "]"); + int number = (int)strtol(token, NULL, 16); + void *addr = number; + HASH_FIND_PTR(unique_mem_addr, &addr, d); + + if (!d) { + stack_struct *e = (stack_struct *) malloc(sizeof *e); + e->address = addr; + HASH_ADD_PTR(unique_mem_addr, address, e); + } + } } + free(strings); } - }*/ - //fclose(fptr); + } + + stack_struct *d = NULL; + + for (d = unique_mem_addr; d != NULL; d = (stack_struct *)(d->hh.next)) { + fprintf(fptr, "%p\n", d->address); + HASH_DEL(unique_mem_addr, d); + } + + //fflush(fptr); + fclose(fptr); } /* * Buffer format: diff --git a/darshan-util/pydarshan/darshan/backend/api_def_c.py b/darshan-util/pydarshan/darshan/backend/api_def_c.py index 46bd33104..7a9179d96 100644 --- a/darshan-util/pydarshan/darshan/backend/api_def_c.py +++ b/darshan-util/pydarshan/darshan/backend/api_def_c.py @@ -221,6 +221,7 @@ double end_time; void *address_array[STACK_TRACE_BUF_SIZE]; int noStackTrace; + int size; } segment_info; /* counter names */ diff --git a/darshan-util/pydarshan/darshan/backend/cffi_backend.py b/darshan-util/pydarshan/darshan/backend/cffi_backend.py index 96216f0c2..7254ab10c 100644 --- a/darshan-util/pydarshan/darshan/backend/cffi_backend.py +++ b/darshan-util/pydarshan/darshan/backend/cffi_backend.py @@ -42,6 +42,8 @@ except: pass +flagPOSIX = False +flagMPIIO = False API_def_c = load_darshan_header(addins) ffi = cffi.FFI() @@ -54,6 +56,7 @@ logfilename = None + _mod_names = [ "NULL", "POSIX", @@ -658,51 +661,50 @@ def log_get_dxt_record(log, mod_name, reads=True, writes=True, dtype='dict'): size_of = ffi.sizeof("struct darshan_fd_s") address_line_mapping = ffi.cast("struct darshan_fd_s *", log['handle']) + global flagPOSIX + global flagMPIIO if mod_name == 'DXT_POSIX': - rec['posix_address_line_mapping'] = [] - data = ffi.string(address_line_mapping.posix_line_mapping) - data = data.decode('utf-8') - data = data.split('\n') - index = 0 - address = "" - function_name = "" - for item in data: - if item: - if index == 0: - address = item - index = 1 - elif index == 1: - func_line = item + if flagPOSIX == False: + flagPOSIX = True + rec['address_line_mapping'] = [] + data = ffi.string(address_line_mapping.posix_line_mapping) + data = data.decode('utf-8') + data = data.split('\n') + for item in data: + if item: + item = item.split(",") + address = item[0] + + func_line = item[1] func_line = func_line.split(":") function_name = func_line[0] line_number = func_line[1] - index = 0 mapping = { "address": address, "function_name": function_name, "line_number": line_number } - rec['posix_address_line_mapping'].append(mapping) + + rec['address_line_mapping'].append(mapping) + else: + rec['address_line_mapping'] = {} elif mod_name == 'DXT_MPIIO': - rec['mpiio_address_line_mapping'] = [] - data = ffi.string(address_line_mapping.mpiio_line_mapping) - data = data.decode('utf-8') - data = data.split('\n') - index = 0 - address = "" - function_name = "" - for item in data: - if item: - if index == 0: - address = item - index = 1 - elif index == 1: - func_line = item + if flagMPIIO == False: + flagMPIIO = True + rec['address_line_mapping'] = [] + data = ffi.string(address_line_mapping.mpiio_line_mapping) + data = data.decode('utf-8') + data = data.split('\n') + for item in data: + if item: + item = item.split(",") + address = item[0] + + func_line = item[1] func_line = func_line.split(":") function_name = func_line[0] line_number = func_line[1] - index = 0 mapping = { "address": address, @@ -710,7 +712,9 @@ def log_get_dxt_record(log, mod_name, reads=True, writes=True, dtype='dict'): "line_number": line_number } - rec['mpiio_address_line_mapping'].append(mapping) + rec['address_line_mapping'].append(mapping) + else: + rec['address_line_mapping'] = {} libdutil.darshan_free(buf[0]) return rec diff --git a/darshan-util/pydarshan/darshan/report.py b/darshan-util/pydarshan/darshan/report.py index 9cb2e4c08..5676f71f2 100644 --- a/darshan-util/pydarshan/darshan/report.py +++ b/darshan-util/pydarshan/darshan/report.py @@ -269,9 +269,9 @@ def to_df(self, attach="default"): rec['read_segments'] = pd.DataFrame(rec['read_segments']) rec['write_segments'] = pd.DataFrame(rec['write_segments']) if mod == 'DXT_POSIX': - rec['posix_address_line_mapping'] = pd.DataFrame(rec['posix_address_line_mapping']) + rec['address_line_mapping'] = pd.DataFrame(rec['address_line_mapping']) elif mod == 'DXT_MPIIO': - rec['mpiio_address_line_mapping'] = pd.DataFrame(rec['mpiio_address_line_mapping']) + rec['address_line_mapping'] = pd.DataFrame(rec['address_line_mapping']) else: df_recs = pd.DataFrame.from_records(records) # generic records have counter and fcounter arrays to collect diff --git a/include/darshan-dxt-log-format.h b/include/darshan-dxt-log-format.h index 7871c2016..2468761d1 100644 --- a/include/darshan-dxt-log-format.h +++ b/include/darshan-dxt-log-format.h @@ -27,7 +27,6 @@ typedef struct segment_info { void *address_array[STACK_TRACE_BUF_SIZE]; int noStackTrace; int size; - void *address_symbols_array[STACK_TRACE_BUF_SIZE]; } segment_info; #define X(a) a, From 54c3aae5b8e2ac012164fde354613e34b4aefaf9 Mon Sep 17 00:00:00 2001 From: hammad45 Date: Mon, 25 Sep 2023 12:09:33 -0700 Subject: [PATCH 07/10] Fixed rank 0 data missing bug --- darshan-runtime/lib/darshan-core.c | 19 ++++++++++++------- darshan-runtime/lib/darshan-dxt.c | 13 +++++++++---- .../pydarshan/darshan/backend/cffi_backend.py | 6 +++--- 3 files changed, 24 insertions(+), 14 deletions(-) diff --git a/darshan-runtime/lib/darshan-core.c b/darshan-runtime/lib/darshan-core.c index a350b6a96..8863a8b42 100644 --- a/darshan-runtime/lib/darshan-core.c +++ b/darshan-runtime/lib/darshan-core.c @@ -77,7 +77,8 @@ static struct darshan_core_mnt_data mnt_data_array[DARSHAN_MAX_MNTS]; static int mnt_data_count = 0; static char *exe_name = ""; - +bool processedPOSIX = false; +bool processedMPIIO = false; #ifdef DARSHAN_BGQ extern void bgq_runtime_initialize(); #endif @@ -683,7 +684,8 @@ void darshan_core_shutdown(int write_log) //if(i == DXT_POSIX_MOD && my_rank == 0 && final_core->config.stack_trace_trigger) if (i == DXT_POSIX_MOD) { PMPI_Barrier(MPI_COMM_WORLD); - if (my_rank == 0 && final_core->config.stack_trace_trigger) { + if (my_rank == 0 && final_core->config.stack_trace_trigger && processedPOSIX == false) { + processedPOSIX = true; FILE *fptr; typedef struct { @@ -734,8 +736,8 @@ void darshan_core_shutdown(int write_log) // sprintf(cmd, "addr2line -a %s -e %s", d->address, exe_name); char addr[32]; - sprintf(addr, "%s", d->address); - + sprintf(addr, "%s", d->address); + char *const args[] = { "/usr/bin/addr2line", "-a", addr, "-e", exe_name, NULL }; int pipe_fd[2]; @@ -767,7 +769,8 @@ void darshan_core_shutdown(int write_log) char * token = strtok(buffer, "\n"); token = strtok(NULL, "\n"); - sprintf(cmd, "%s, %s\n", buffer, token); + int number = (int)strtol(buffer, NULL, 16); + sprintf(cmd, "%p, %s\n", number, token); strcat(address_line_mapping, cmd); } HASH_DEL(unique_mem_addr, d); @@ -778,7 +781,8 @@ void darshan_core_shutdown(int write_log) } else if (i == DXT_MPIIO_MOD) { PMPI_Barrier(MPI_COMM_WORLD); - if (my_rank == 0 && final_core->config.stack_trace_trigger) { + if (my_rank == 0 && final_core->config.stack_trace_trigger && processedMPIIO == false) { + processedMPIIO = true; FILE *fptr; typedef struct { @@ -866,7 +870,8 @@ void darshan_core_shutdown(int write_log) char * token = strtok(buffer, "\n"); token = strtok(NULL, "\n"); - sprintf(cmd, "%s, %s\n", buffer, token); + int number = (int)strtol(buffer, NULL, 16); + sprintf(cmd, "%p, %s\n", number, token); strcat(address_line_mapping, cmd); } diff --git a/darshan-runtime/lib/darshan-dxt.c b/darshan-runtime/lib/darshan-dxt.c index c6d3068c6..07c62b111 100644 --- a/darshan-runtime/lib/darshan-dxt.c +++ b/darshan-runtime/lib/darshan-dxt.c @@ -62,6 +62,8 @@ typedef int64_t off64_t; #define STACK_TRACE_BUF_SIZE 60 bool isStackTrace = false; +bool processedBeforePOSIX = false; +bool processedBeforeMPIIO = false; /* The dxt_file_record_ref structure maintains necessary runtime metadata * for the DXT file record (dxt_file_record structure, defined in @@ -827,12 +829,13 @@ static void dxt_serialize_posix_records(void *rec_ref_p, void *user_ptr) if (record_write_count == 0 && record_read_count == 0) return; - if (isStackTrace){ + if (isStackTrace && processedBeforePOSIX == false){ + processedBeforePOSIX = true; char stack_file_name[50]; sprintf(stack_file_name, ".%d.darshan-posix", dxt_my_rank); FILE *fptr; - fptr = fopen(stack_file_name, "w"); + fptr = fopen(stack_file_name, "a+"); typedef struct { void *address; /* key */ @@ -898,6 +901,7 @@ static void dxt_serialize_posix_records(void *rec_ref_p, void *user_ptr) stack_struct *d = NULL; + for (d = unique_mem_addr; d != NULL; d = (stack_struct *)(d->hh.next)) { fprintf(fptr, "%p\n", d->address); HASH_DEL(unique_mem_addr, d); @@ -1000,12 +1004,13 @@ static void dxt_serialize_mpiio_records(void *rec_ref_p, void *user_ptr) if (record_write_count == 0 && record_read_count == 0) return; - if (isStackTrace){ + if (isStackTrace && processedBeforeMPIIO == false){ + processedBeforeMPIIO = true; char stack_file_name[50]; sprintf(stack_file_name, ".%d.darshan-mpiio", dxt_my_rank); FILE *fptr; - fptr = fopen(stack_file_name, "w"); + fptr = fopen(stack_file_name, "a+"); typedef struct { void *address; /* key */ diff --git a/darshan-util/pydarshan/darshan/backend/cffi_backend.py b/darshan-util/pydarshan/darshan/backend/cffi_backend.py index 7254ab10c..074189d84 100644 --- a/darshan-util/pydarshan/darshan/backend/cffi_backend.py +++ b/darshan-util/pydarshan/darshan/backend/cffi_backend.py @@ -97,7 +97,7 @@ def mod_name_to_idx(mod_name): "APMPI-PERF": "struct darshan_apmpi_perf_record **", } -STACK_TRACE_BUF_SIZE = 30 +STACK_TRACE_BUF_SIZE = 60 def get_lib_version(): """ @@ -626,7 +626,7 @@ def log_get_dxt_record(log, mod_name, reads=True, writes=True, dtype='dict'): addr = str(segments[i].address_array[j]) addr = addr.split("'void *' ") addr = addr[1].split(">") - seg_array.append(addr[0]) + seg_array.append(str(addr[0])) seg["stack_memory_addresses"] = seg_array rec['write_segments'].append(seg) @@ -647,7 +647,7 @@ def log_get_dxt_record(log, mod_name, reads=True, writes=True, dtype='dict'): addr = str(segments[i].address_array[j]) addr = addr.split("'void *' ") addr = addr[1].split(">") - seg_array.append(addr[0]) + seg_array.append(str(addr[0])) seg["stack_memory_addresses"] = seg_array rec['read_segments'].append(seg) From 6d59cf9a1511f62ccf886d9208a3c6e92bc32bc0 Mon Sep 17 00:00:00 2001 From: hammad45 Date: Mon, 25 Sep 2023 13:07:04 -0700 Subject: [PATCH 08/10] Bug fix --- darshan-runtime/lib/darshan-core.c | 8 ++------ darshan-runtime/lib/darshan-dxt.c | 9 ++------- 2 files changed, 4 insertions(+), 13 deletions(-) diff --git a/darshan-runtime/lib/darshan-core.c b/darshan-runtime/lib/darshan-core.c index 8863a8b42..cc8817e2b 100644 --- a/darshan-runtime/lib/darshan-core.c +++ b/darshan-runtime/lib/darshan-core.c @@ -77,8 +77,6 @@ static struct darshan_core_mnt_data mnt_data_array[DARSHAN_MAX_MNTS]; static int mnt_data_count = 0; static char *exe_name = ""; -bool processedPOSIX = false; -bool processedMPIIO = false; #ifdef DARSHAN_BGQ extern void bgq_runtime_initialize(); #endif @@ -684,8 +682,7 @@ void darshan_core_shutdown(int write_log) //if(i == DXT_POSIX_MOD && my_rank == 0 && final_core->config.stack_trace_trigger) if (i == DXT_POSIX_MOD) { PMPI_Barrier(MPI_COMM_WORLD); - if (my_rank == 0 && final_core->config.stack_trace_trigger && processedPOSIX == false) { - processedPOSIX = true; + if (my_rank == 0 && final_core->config.stack_trace_trigger) { FILE *fptr; typedef struct { @@ -781,8 +778,7 @@ void darshan_core_shutdown(int write_log) } else if (i == DXT_MPIIO_MOD) { PMPI_Barrier(MPI_COMM_WORLD); - if (my_rank == 0 && final_core->config.stack_trace_trigger && processedMPIIO == false) { - processedMPIIO = true; + if (my_rank == 0 && final_core->config.stack_trace_trigger) { FILE *fptr; typedef struct { diff --git a/darshan-runtime/lib/darshan-dxt.c b/darshan-runtime/lib/darshan-dxt.c index 07c62b111..eafe44d67 100644 --- a/darshan-runtime/lib/darshan-dxt.c +++ b/darshan-runtime/lib/darshan-dxt.c @@ -62,9 +62,6 @@ typedef int64_t off64_t; #define STACK_TRACE_BUF_SIZE 60 bool isStackTrace = false; -bool processedBeforePOSIX = false; -bool processedBeforeMPIIO = false; - /* The dxt_file_record_ref structure maintains necessary runtime metadata * for the DXT file record (dxt_file_record structure, defined in * darshan-dxt-log-format.h) pointed to by 'file_rec'. This metadata @@ -829,8 +826,7 @@ static void dxt_serialize_posix_records(void *rec_ref_p, void *user_ptr) if (record_write_count == 0 && record_read_count == 0) return; - if (isStackTrace && processedBeforePOSIX == false){ - processedBeforePOSIX = true; + if (isStackTrace){ char stack_file_name[50]; sprintf(stack_file_name, ".%d.darshan-posix", dxt_my_rank); @@ -1004,8 +1000,7 @@ static void dxt_serialize_mpiio_records(void *rec_ref_p, void *user_ptr) if (record_write_count == 0 && record_read_count == 0) return; - if (isStackTrace && processedBeforeMPIIO == false){ - processedBeforeMPIIO = true; + if (isStackTrace){ char stack_file_name[50]; sprintf(stack_file_name, ".%d.darshan-mpiio", dxt_my_rank); From 6825e76b9094489d3a86a6e66de824ce1ed2e28e Mon Sep 17 00:00:00 2001 From: hammad45 Date: Tue, 28 Nov 2023 10:35:09 -0800 Subject: [PATCH 09/10] Updated Backtrace Code --- darshan-runtime/lib/darshan-core.c | 19 ++++------ darshan-runtime/lib/darshan-dxt.c | 59 ++---------------------------- darshan-runtime/lib/darshan-hdf5.c | 3 +- 3 files changed, 13 insertions(+), 68 deletions(-) diff --git a/darshan-runtime/lib/darshan-core.c b/darshan-runtime/lib/darshan-core.c index cc8817e2b..51beedbc1 100644 --- a/darshan-runtime/lib/darshan-core.c +++ b/darshan-runtime/lib/darshan-core.c @@ -54,8 +54,6 @@ #include #endif -#define STACK_TRACE_BUF_SIZE 60 - extern char* __progname; extern char* __progname_full; struct darshan_core_runtime *__darshan_core = NULL; @@ -675,11 +673,11 @@ void darshan_core_shutdown(int write_log) /* get the final output buffer */ this_mod->mod_funcs.mod_output_func(&mod_buf, &mod_buf_sz); } - + +/* Code added by Hammad Ather (hather@lbl.gov) and Jean Luca Bez (jlbez@lbl.gov) */ #ifdef HAVE_MPI if(using_mpi) { - //if(i == DXT_POSIX_MOD && my_rank == 0 && final_core->config.stack_trace_trigger) if (i == DXT_POSIX_MOD) { PMPI_Barrier(MPI_COMM_WORLD); if (my_rank == 0 && final_core->config.stack_trace_trigger) { @@ -715,8 +713,6 @@ void darshan_core_shutdown(int write_log) fclose(fptr); remove(stack_file_name_posix); - } else { - printf("unable to open POSIX file\n"); } } @@ -731,7 +727,6 @@ void darshan_core_shutdown(int write_log) char *line = NULL; size_t len = 0; - // sprintf(cmd, "addr2line -a %s -e %s", d->address, exe_name); char addr[32]; sprintf(addr, "%s", d->address); @@ -760,8 +755,10 @@ void darshan_core_shutdown(int write_log) // Read the output from the pipe char buffer[4096]; ssize_t bytes_read; + FILE* debug; + debug = fopen("/dev/null", "w"); while ((bytes_read = read(pipe_fd[0], buffer, sizeof(buffer))) > 0) { - fwrite(buffer, 1, bytes_read, stdout); + fwrite(buffer, 1, bytes_read, debug); } char * token = strtok(buffer, "\n"); @@ -811,8 +808,6 @@ void darshan_core_shutdown(int write_log) fclose(fptr); remove(stack_file_name_mpiio); - } else { - printf("unable to open MPIIO file\n"); } } @@ -857,8 +852,10 @@ void darshan_core_shutdown(int write_log) // Read the output from the pipe char buffer[4096]; ssize_t bytes_read; + FILE* debug; + debug = fopen("/dev/null", "w"); while ((bytes_read = read(pipe_fd[0], buffer, sizeof(buffer))) > 0) { - fwrite(buffer, 1, bytes_read, stdout); + fwrite(buffer, 1, bytes_read, debug); } // Wait for the child process to complete diff --git a/darshan-runtime/lib/darshan-dxt.c b/darshan-runtime/lib/darshan-dxt.c index eafe44d67..40d096c38 100644 --- a/darshan-runtime/lib/darshan-dxt.c +++ b/darshan-runtime/lib/darshan-dxt.c @@ -838,7 +838,6 @@ static void dxt_serialize_posix_records(void *rec_ref_p, void *user_ptr) UT_hash_handle hh; /* makes this structure hashable */ } stack_struct; - stack_struct *unique_mem_addr = NULL; char * exe_name = darshan_exe(); for(int i = 0; i < record_write_count; i++){ @@ -849,19 +848,11 @@ static void dxt_serialize_posix_records(void *rec_ref_p, void *user_ptr) { for (int j = 0; j < size; j++){ if (strstr(strings[j], exe_name) != NULL) { - stack_struct *d = NULL; char * token = strtok(strings[j], "["); token = strtok(NULL, "["); token = strtok(token, "]"); int number = (int)strtol(token, NULL, 16); - void *addr = number; - HASH_FIND_PTR(unique_mem_addr, &addr, d); - - if (!d) { - stack_struct *e = (stack_struct *) malloc(sizeof *e); - e->address = addr; - HASH_ADD_PTR(unique_mem_addr, address, e); - } + fprintf(fptr, "%p\n", number); } } free(strings); @@ -876,34 +867,17 @@ static void dxt_serialize_posix_records(void *rec_ref_p, void *user_ptr) { for (int j = 0; j < size; j++){ if (strstr(strings[j], exe_name) != NULL) { - stack_struct *d = NULL; char * token = strtok(strings[j], "["); token = strtok(NULL, "["); token = strtok(token, "]"); int number = (int)strtol(token, NULL, 16); - void *addr = number; - HASH_FIND_PTR(unique_mem_addr, &addr, d); - - if (!d) { - stack_struct *e = (stack_struct *) malloc(sizeof *e); - e->address = addr; - HASH_ADD_PTR(unique_mem_addr, address, e); - } + fprintf(fptr, "%p\n", number); } } free(strings); } } - stack_struct *d = NULL; - - - for (d = unique_mem_addr; d != NULL; d = (stack_struct *)(d->hh.next)) { - fprintf(fptr, "%p\n", d->address); - HASH_DEL(unique_mem_addr, d); - } - - //fflush(fptr); fclose(fptr); } @@ -1012,7 +986,6 @@ static void dxt_serialize_mpiio_records(void *rec_ref_p, void *user_ptr) UT_hash_handle hh; /* makes this structure hashable */ } stack_struct; - stack_struct *unique_mem_addr = NULL; char * exe_name = darshan_exe(); @@ -1024,19 +997,11 @@ static void dxt_serialize_mpiio_records(void *rec_ref_p, void *user_ptr) { for (int j = 0; j < size; j++){ if (strstr(strings[j], exe_name) != NULL) { - stack_struct *d = NULL; char * token = strtok(strings[j], "["); token = strtok(NULL, "["); token = strtok(token, "]"); int number = (int)strtol(token, NULL, 16); - void *addr = number; - HASH_FIND_PTR(unique_mem_addr, &addr, d); - - if (!d) { - stack_struct *e = (stack_struct *) malloc(sizeof *e); - e->address = addr; - HASH_ADD_PTR(unique_mem_addr, address, e); - } + fprintf(fptr, "%p\n", number); } } free(strings); @@ -1051,33 +1016,17 @@ static void dxt_serialize_mpiio_records(void *rec_ref_p, void *user_ptr) { for (int j = 0; j < size; j++){ if (strstr(strings[j], exe_name) != NULL) { - stack_struct *d = NULL; char * token = strtok(strings[j], "["); token = strtok(NULL, "["); token = strtok(token, "]"); int number = (int)strtol(token, NULL, 16); - void *addr = number; - HASH_FIND_PTR(unique_mem_addr, &addr, d); - - if (!d) { - stack_struct *e = (stack_struct *) malloc(sizeof *e); - e->address = addr; - HASH_ADD_PTR(unique_mem_addr, address, e); - } + fprintf(fptr, "%p\n", number); } } free(strings); } } - stack_struct *d = NULL; - - for (d = unique_mem_addr; d != NULL; d = (stack_struct *)(d->hh.next)) { - fprintf(fptr, "%p\n", d->address); - HASH_DEL(unique_mem_addr, d); - } - - //fflush(fptr); fclose(fptr); } /* diff --git a/darshan-runtime/lib/darshan-hdf5.c b/darshan-runtime/lib/darshan-hdf5.c index 868581d80..2b6fb95f7 100644 --- a/darshan-runtime/lib/darshan-hdf5.c +++ b/darshan-runtime/lib/darshan-hdf5.c @@ -57,7 +57,6 @@ DARSHAN_FORWARD_DECL(H5Oopen_by_token, hid_t, (hid_t loc_id, H5O_token_t token)) #endif DARSHAN_FORWARD_DECL(H5Oclose, herr_t, (hid_t object_id)); - /* structure that can track i/o stats for a given HDF5 file record at runtime */ struct hdf5_file_record_ref { @@ -2055,4 +2054,4 @@ static void hdf5_dataset_cleanup() * End: * * vim: ts=8 sts=4 sw=4 expandtab - */ + */ \ No newline at end of file From 45f0f5202692f36154105f2a351c6f5877ed0737 Mon Sep 17 00:00:00 2001 From: hammad45 Date: Tue, 28 Nov 2023 10:46:04 -0800 Subject: [PATCH 10/10] Updated Backtrace Code --- darshan-runtime/lib/darshan-core.c | 2 +- darshan-runtime/lib/darshan-dxt.c | 7 +++++++ 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/darshan-runtime/lib/darshan-core.c b/darshan-runtime/lib/darshan-core.c index 51beedbc1..1729c185c 100644 --- a/darshan-runtime/lib/darshan-core.c +++ b/darshan-runtime/lib/darshan-core.c @@ -39,7 +39,7 @@ #include #include -#ifdef HAVE_/global/homes/h/hather/darshan/darshan-runtime/install/libMPI +#ifdef HAVE_MPI #include #endif diff --git a/darshan-runtime/lib/darshan-dxt.c b/darshan-runtime/lib/darshan-dxt.c index 40d096c38..b585c1b86 100644 --- a/darshan-runtime/lib/darshan-dxt.c +++ b/darshan-runtime/lib/darshan-dxt.c @@ -265,6 +265,8 @@ void dxt_posix_write(darshan_record_id rec_id, int64_t offset, rec_ref->write_traces[file_rec->write_count].length = length; rec_ref->write_traces[file_rec->write_count].start_time = start_time; rec_ref->write_traces[file_rec->write_count].end_time = end_time; + + /* Code added by Hammad Ather (hather@lbl.gov) and Jean Luca Bez (jlbez@lbl.gov) */ if (isStackTrace){ int size = backtrace (rec_ref->write_traces[file_rec->write_count].address_array, STACK_TRACE_BUF_SIZE); rec_ref->write_traces[file_rec->write_count].noStackTrace = 1; @@ -317,6 +319,7 @@ void dxt_posix_read(darshan_record_id rec_id, int64_t offset, rec_ref->read_traces[file_rec->read_count].length = length; rec_ref->read_traces[file_rec->read_count].start_time = start_time; rec_ref->read_traces[file_rec->read_count].end_time = end_time; + /* Code added by Hammad Ather (hather@lbl.gov) and Jean Luca Bez (jlbez@lbl.gov) */ if (isStackTrace){ int size = backtrace (rec_ref->read_traces[file_rec->read_count].address_array , STACK_TRACE_BUF_SIZE); rec_ref->read_traces[file_rec->read_count].noStackTrace = 1; @@ -368,6 +371,7 @@ void dxt_mpiio_write(darshan_record_id rec_id, int64_t offset, rec_ref->write_traces[file_rec->write_count].offset = offset; rec_ref->write_traces[file_rec->write_count].start_time = start_time; rec_ref->write_traces[file_rec->write_count].end_time = end_time; + /* Code added by Hammad Ather (hather@lbl.gov) and Jean Luca Bez (jlbez@lbl.gov) */ if (isStackTrace){ int size = backtrace (rec_ref->write_traces[file_rec->write_count].address_array, STACK_TRACE_BUF_SIZE); rec_ref->write_traces[file_rec->write_count].noStackTrace = 1; @@ -420,6 +424,7 @@ void dxt_mpiio_read(darshan_record_id rec_id, int64_t offset, rec_ref->read_traces[file_rec->read_count].offset = offset; rec_ref->read_traces[file_rec->read_count].start_time = start_time; rec_ref->read_traces[file_rec->read_count].end_time = end_time; + /* Code added by Hammad Ather (hather@lbl.gov) and Jean Luca Bez (jlbez@lbl.gov) */ if (isStackTrace){ int size = backtrace (rec_ref->read_traces[file_rec->read_count].address_array , STACK_TRACE_BUF_SIZE); rec_ref->read_traces[file_rec->read_count].noStackTrace = 1; @@ -826,6 +831,7 @@ static void dxt_serialize_posix_records(void *rec_ref_p, void *user_ptr) if (record_write_count == 0 && record_read_count == 0) return; + /* Code added by Hammad Ather (hather@lbl.gov) and Jean Luca Bez (jlbez@lbl.gov) */ if (isStackTrace){ char stack_file_name[50]; sprintf(stack_file_name, ".%d.darshan-posix", dxt_my_rank); @@ -974,6 +980,7 @@ static void dxt_serialize_mpiio_records(void *rec_ref_p, void *user_ptr) if (record_write_count == 0 && record_read_count == 0) return; + /* Code added by Hammad Ather (hather@lbl.gov) and Jean Luca Bez (jlbez@lbl.gov) */ if (isStackTrace){ char stack_file_name[50]; sprintf(stack_file_name, ".%d.darshan-mpiio", dxt_my_rank);