diff --git a/darshan-util/pydarshan/darshan/experimental/plots/plot_access_histogram.py b/darshan-util/pydarshan/darshan/experimental/plots/plot_access_histogram.py index cfb8c75e4..4d6b28477 100644 --- a/darshan-util/pydarshan/darshan/experimental/plots/plot_access_histogram.py +++ b/darshan-util/pydarshan/darshan/experimental/plots/plot_access_histogram.py @@ -18,12 +18,12 @@ def autolabel(ax, rects): rotation=45, ) -def plot_access_histogram(report, mod, ax=None): +def plot_access_histogram(record, mod, ax=None): """ Plots a histogram of access sizes for specified module. Args: - report (darshan.DarshanReport): report to generate plot from + record: a dictionary with 2 separate DataFrames: 'counters' and 'fcounters' mod (str): mod-string for which to generate access_histogram """ @@ -33,44 +33,42 @@ def plot_access_histogram(report, mod, ax=None): else: fig = None - # TODO: change to report.summary - if 'mod_agg_iohist' in dir(report): - report.mod_agg_iohist(mod) - else: - print("Cannot create summary, mod_agg_iohist aggregator is not registered with the report class.") # defaults labels = ['0-100', '101-1K', '1K-10K', '10K-100K', '100K-1M', '1M-4M', '4M-10M', '10M-100M', '100M-1G', '1G+'] - agg = report.summary['agg_iohist'][mod] - # TODO: can simplify the read/write vals below after - # support for python 3.6 is dropped + counters=record['counters'] + if mod == 'MPI-IO': + rd_counter_prefix = f'MPIIO_SIZE_READ_AGG_' + wr_counter_prefix = f'MPIIO_SIZE_WRITE_AGG_' + else: + rd_counter_prefix = f'{mod}_SIZE_READ_' + wr_counter_prefix = f'{mod}_SIZE_WRITE_' read_vals = [ - agg['READ_0_100'], - agg['READ_100_1K'], - agg['READ_1K_10K'], - agg['READ_10K_100K'], - agg['READ_100K_1M'], - agg['READ_1M_4M'], - agg['READ_4M_10M'], - agg['READ_10M_100M'], - agg['READ_100M_1G'], - agg['READ_1G_PLUS'] + counters[f'{rd_counter_prefix}0_100'][0], + counters[f'{rd_counter_prefix}100_1K'][0], + counters[f'{rd_counter_prefix}1K_10K'][0], + counters[f'{rd_counter_prefix}10K_100K'][0], + counters[f'{rd_counter_prefix}100K_1M'][0], + counters[f'{rd_counter_prefix}1M_4M'][0], + counters[f'{rd_counter_prefix}4M_10M'][0], + counters[f'{rd_counter_prefix}10M_100M'][0], + counters[f'{rd_counter_prefix}100M_1G'][0], + counters[f'{rd_counter_prefix}1G_PLUS'][0] ] - write_vals = [ - agg['WRITE_0_100'], - agg['WRITE_100_1K'], - agg['WRITE_1K_10K'], - agg['WRITE_10K_100K'], - agg['WRITE_100K_1M'], - agg['WRITE_1M_4M'], - agg['WRITE_4M_10M'], - agg['WRITE_10M_100M'], - agg['WRITE_100M_1G'], - agg['WRITE_1G_PLUS'] + counters[f'{wr_counter_prefix}0_100'][0], + counters[f'{wr_counter_prefix}100_1K'][0], + counters[f'{wr_counter_prefix}1K_10K'][0], + counters[f'{wr_counter_prefix}10K_100K'][0], + counters[f'{wr_counter_prefix}100K_1M'][0], + counters[f'{wr_counter_prefix}1M_4M'][0], + counters[f'{wr_counter_prefix}4M_10M'][0], + counters[f'{wr_counter_prefix}10M_100M'][0], + counters[f'{wr_counter_prefix}100M_1G'][0], + counters[f'{wr_counter_prefix}1G_PLUS'][0] ] - + #TODO: add support for HDF5/PnetCDF modules x = np.arange(len(labels)) # the label locations width = 0.35 # the width of the bars diff --git a/darshan-util/pydarshan/darshan/experimental/plots/plot_common_access_table.py b/darshan-util/pydarshan/darshan/experimental/plots/plot_common_access_table.py index 6a1d50bb1..b20171789 100644 --- a/darshan-util/pydarshan/darshan/experimental/plots/plot_common_access_table.py +++ b/darshan-util/pydarshan/darshan/experimental/plots/plot_common_access_table.py @@ -83,7 +83,7 @@ def get_access_count_df(mod_df: Any, mod: str) -> Any: Parameters ---------- mod_df: "counters" dataframe for the input - module `mod` from a ``darshan.DarshanReport``. + module `mod` from a dictionary with 2 separate DataFrames: 'counters' and 'fcounters'. mod: the module to obtain the common accesses table for (i.e "POSIX", "MPI-IO", "H5D"). @@ -102,7 +102,6 @@ def get_access_count_df(mod_df: Any, mod: str) -> Any: df = mod_df.filter(filter_keys) df = collapse_access_cols(df=df, col_name=col_name) df_list.append(df) - return pd.concat(df_list, axis=1) @@ -122,14 +121,14 @@ def __init__(self, df: Any, **kwargs): self.html = self.df.to_html(**kwargs) -def plot_common_access_table(report: darshan.DarshanReport, mod: str, n_rows: int = 4) -> DarshanReportTable: +def plot_common_access_table(record: dict, mod: str, n_rows: int = 4) -> DarshanReportTable: """ Creates a table containing the most common access sizes and their counts. Parameters ---------- - report: a ``darshan.DarshanReport``. + record: a dictionary with 2 separate DataFrames: 'counters' and 'fcounters' mod: the module to obtain the common access size table for (i.e "POSIX", "MPI-IO", "H5D"). @@ -145,8 +144,7 @@ def plot_common_access_table(report: darshan.DarshanReport, mod: str, n_rows: in the `df` or `html` attributes, respectively. """ - mod_df = report.records[mod].to_df(attach=None)["counters"] - + mod_df=record['counters'] if mod == "MPI-IO": mod = "MPIIO" diff --git a/darshan-util/pydarshan/darshan/experimental/plots/plot_opcounts.py b/darshan-util/pydarshan/darshan/experimental/plots/plot_opcounts.py index 1d648c610..6b0843a5e 100644 --- a/darshan-util/pydarshan/darshan/experimental/plots/plot_opcounts.py +++ b/darshan-util/pydarshan/darshan/experimental/plots/plot_opcounts.py @@ -20,34 +20,26 @@ def autolabel(ax, rects): rotation=45, ) -def gather_count_data(report, mod): + +def gather_count_data(record, mod): """ Collect the module counts and labels for the I/O Operation Count plot. """ - # TODO: change to report.summary - if 'agg_ioops' in dir(report): - report.agg_ioops() - else: - print( - "Cannot create summary, agg_ioops aggregator is not " - "registered with the report class. Be sure to call " - "darshan.experimental() once before invoking this plot." - ) - - mod_data = report.summary['agg_ioops'][mod] - + if mod in ['H5F', 'H5D', 'PNETCDF_FILE', 'PNETCDF_VAR']: + raise ValueError(f"Error: plot_opcounts not supported for module {mod}") + mod_data = record['counters'] # Gather POSIX if mod == 'POSIX': labels = ['Read', 'Write', 'Open', 'Stat', 'Seek', 'Mmap', 'Fsync'] counts = [ - mod_data['POSIX_READS'], - mod_data['POSIX_WRITES'], - mod_data['POSIX_OPENS'], - mod_data['POSIX_STATS'], - mod_data['POSIX_SEEKS'], + mod_data['POSIX_READS'][0], + mod_data['POSIX_WRITES'][0], + mod_data['POSIX_OPENS'][0], + mod_data['POSIX_STATS'][0], + mod_data['POSIX_SEEKS'][0], 0, # faulty? mod_data['POSIX_MMAPS'], - mod_data['POSIX_FSYNCS'] + mod_data['POSIX_FDSYNCS'] + mod_data['POSIX_FSYNCS'][0] + mod_data['POSIX_FDSYNCS'][0] ] # Gather MPIIO @@ -56,114 +48,114 @@ def gather_count_data(report, mod): 'Ind. Read', 'Ind. Write', 'Ind. Open', 'Col. Read', 'Col. Write', 'Col. Open', 'Sync'] counts = [ - mod_data['MPIIO_INDEP_READS'], - mod_data['MPIIO_INDEP_WRITES'], - mod_data['MPIIO_INDEP_OPENS'], - mod_data['MPIIO_COLL_READS'], - mod_data['MPIIO_COLL_WRITES'], - mod_data['MPIIO_COLL_OPENS'], - mod_data['MPIIO_SYNCS'], + mod_data['MPIIO_INDEP_READS'][0], + mod_data['MPIIO_INDEP_WRITES'][0], + mod_data['MPIIO_INDEP_OPENS'][0], + mod_data['MPIIO_COLL_READS'][0], + mod_data['MPIIO_COLL_WRITES'][0], + mod_data['MPIIO_COLL_OPENS'][0], + mod_data['MPIIO_SYNCS'][0], ] # Gather Stdio elif mod == 'STDIO': labels = ['Read', 'Write', 'Open', 'Seek', 'Flush'] counts = [ - mod_data['STDIO_READS'], - mod_data['STDIO_WRITES'], - mod_data['STDIO_OPENS'], - mod_data['STDIO_SEEKS'], - mod_data['STDIO_FLUSHES'] + mod_data['STDIO_READS'][0], + mod_data['STDIO_WRITES'][0], + mod_data['STDIO_OPENS'][0], + mod_data['STDIO_SEEKS'][0], + mod_data['STDIO_FLUSHES'][0] ] - elif mod == 'H5F': - labels = [ - 'H5D Read', 'H5D Write', 'H5D Open', - 'H5D Flush', 'H5F Open', 'H5F Flush', - ] - counts = [ - # set H5D counters to zero - 0, 0, 0, 0, - mod_data['H5F_OPENS'], - mod_data['H5F_FLUSHES'], - ] - - elif mod == 'H5D': - labels = [ - 'H5D Read', 'H5D Write', 'H5D Open', - 'H5D Flush', 'H5F Open', 'H5F Flush', - ] +# elif mod == 'H5F': +# labels = [ +# 'H5D Read', 'H5D Write', 'H5D Open', +# 'H5D Flush', 'H5F Open', 'H5F Flush', +# ] +# counts = [ +# # set H5D counters to zero +# 0, 0, 0, 0, +# mod_data['H5F_OPENS'][0], +# mod_data['H5F_FLUSHES'][0], +# ] + +# elif mod == 'H5D': +# labels = [ +# 'H5D Read', 'H5D Write', 'H5D Open', +# 'H5D Flush', 'H5F Open', 'H5F Flush', +# ] # H5F is not necessarily available following # gh-703 - if not "H5F" in report.summary["agg_ioops"]: - report.summary['agg_ioops']['H5F'] = defaultdict(lambda: 0) - - counts = [ - report.summary['agg_ioops']['H5D']['H5D_READS'], - report.summary['agg_ioops']['H5D']['H5D_WRITES'], - report.summary['agg_ioops']['H5D']['H5D_OPENS'], - report.summary['agg_ioops']['H5D']['H5D_FLUSHES'], - report.summary['agg_ioops']['H5F']['H5F_OPENS'], - report.summary['agg_ioops']['H5F']['H5F_FLUSHES'], - ] - - elif mod == 'PNETCDF_FILE': - labels = [ - 'Var Ind Read', 'Var Ind Write', 'Var Open', - 'Var Coll Read', 'Var Coll Write', - 'Var NB Read', 'Var NB Write', - 'File Open', - 'File Sync', - 'File Ind Waits', - 'File Coll Waits', - ] - counts = [ - # most of the counters will all get set in PNETCDF_VAR - 0, 0, 0, 0, 0, 0, 0, - mod_data["PNETCDF_FILE_OPENS"] + mod_data["PNETCDF_FILE_CREATES"], - mod_data["PNETCDF_FILE_SYNCS"], - mod_data['PNETCDF_FILE_INDEP_WAITS'], - mod_data['PNETCDF_FILE_COLL_WAITS'], - ] - - elif mod == 'PNETCDF_VAR': - labels = [ - 'Var Ind Read', 'Var Ind Write', 'Var Open', - 'Var Coll Read', 'Var Coll Write', - 'Var NB Read', 'Var NB Write', - 'File Open', - 'File Sync', - 'File Ind Waits', - 'File Coll Waits', - ] - counts = [ - report.summary['agg_ioops']['PNETCDF_VAR']['PNETCDF_VAR_INDEP_READS'], - report.summary['agg_ioops']['PNETCDF_VAR']['PNETCDF_VAR_INDEP_WRITES'], - report.summary['agg_ioops']['PNETCDF_VAR']['PNETCDF_VAR_OPENS'], - report.summary['agg_ioops']['PNETCDF_VAR']['PNETCDF_VAR_COLL_READS'], - report.summary['agg_ioops']['PNETCDF_VAR']['PNETCDF_VAR_COLL_WRITES'], - report.summary['agg_ioops']['PNETCDF_VAR']['PNETCDF_VAR_NB_READS'], - report.summary['agg_ioops']['PNETCDF_VAR']['PNETCDF_VAR_NB_WRITES'], - # NOTE: should handle cases where only 1/2 PNETCDF mods - # are present? - (report.summary['agg_ioops']['PNETCDF_FILE']['PNETCDF_FILE_OPENS'] + - report.summary['agg_ioops']['PNETCDF_FILE']['PNETCDF_FILE_CREATES'] - ), - report.summary['agg_ioops']['PNETCDF_FILE']['PNETCDF_FILE_SYNCS'], - report.summary['agg_ioops']['PNETCDF_FILE']['PNETCDF_FILE_INDEP_WAITS'], - report.summary['agg_ioops']['PNETCDF_FILE']['PNETCDF_FILE_COLL_WAITS'], - ] +# if not "H5F" in record.summary["agg_ioops"]: +# record.summary['agg_ioops']['H5F'] = defaultdict(lambda: 0) + +# counts = [ +# record.summary['agg_ioops']['H5D']['H5D_READS'], +# record.summary['agg_ioops']['H5D']['H5D_WRITES'], +# record.summary['agg_ioops']['H5D']['H5D_OPENS'], +# record.summary['agg_ioops']['H5D']['H5D_FLUSHES'], +# record.summary['agg_ioops']['H5F']['H5F_OPENS'], +# record.summary['agg_ioops']['H5F']['H5F_FLUSHES'], +# ] + +# elif mod == 'PNETCDF_FILE': +# labels = [ +# 'Var Ind Read', 'Var Ind Write', 'Var Open', +# 'Var Coll Read', 'Var Coll Write', +# 'Var NB Read', 'Var NB Write', +# 'File Open', +# 'File Sync', +# 'File Ind Waits', +# 'File Coll Waits', +# ] +# counts = [ +# # most of the counters will all get set in PNETCDF_VAR +# 0, 0, 0, 0, 0, 0, 0, +# mod_data["PNETCDF_FILE_OPENS"][0] + mod_data["PNETCDF_FILE_CREATES"][0], +# mod_data["PNETCDF_FILE_SYNCS"][0], +# mod_data['PNETCDF_FILE_INDEP_WAITS'][0], +# mod_data['PNETCDF_FILE_COLL_WAITS'][0], +# ] + +# elif mod == 'PNETCDF_VAR': +# labels = [ +# 'Var Ind Read', 'Var Ind Write', 'Var Open', +# 'Var Coll Read', 'Var Coll Write', +# 'Var NB Read', 'Var NB Write', +# 'File Open', +# 'File Sync', +# 'File Ind Waits', +# 'File Coll Waits', +# ] +# counts = [ +# record.summary['agg_ioops']['PNETCDF_VAR']['PNETCDF_VAR_INDEP_READS'], +# record.summary['agg_ioops']['PNETCDF_VAR']['PNETCDF_VAR_INDEP_WRITES'], +# record.summary['agg_ioops']['PNETCDF_VAR']['PNETCDF_VAR_OPENS'], +# record.summary['agg_ioops']['PNETCDF_VAR']['PNETCDF_VAR_COLL_READS'], +# record.summary['agg_ioops']['PNETCDF_VAR']['PNETCDF_VAR_COLL_WRITES'], +# record.summary['agg_ioops']['PNETCDF_VAR']['PNETCDF_VAR_NB_READS'], +# record.summary['agg_ioops']['PNETCDF_VAR']['PNETCDF_VAR_NB_WRITES'], +# # NOTE: should handle cases where only 1/2 PNETCDF mods +# # are present? +# (record.summary['agg_ioops']['PNETCDF_FILE']['PNETCDF_FILE_OPENS'] + +# record.summary['agg_ioops']['PNETCDF_FILE']['PNETCDF_FILE_CREATES'] +# ), +# record.summary['agg_ioops']['PNETCDF_FILE']['PNETCDF_FILE_SYNCS'], +# record.summary['agg_ioops']['PNETCDF_FILE']['PNETCDF_FILE_INDEP_WAITS'], +# record.summary['agg_ioops']['PNETCDF_FILE']['PNETCDF_FILE_COLL_WAITS'], +# ] return labels, counts -def plot_opcounts(report, mod, ax=None): +def plot_opcounts(record, mod, ax=None): """ Generates a bar chart summary for operation counts. Parameters ---------- - report (DarshanReport): darshan report object to plot + record: darshan record object to plot mod: the module to plot operation counts for (i.e. "POSIX", "MPI-IO", "STDIO", "H5F", "H5D"). If "H5D" is input the returned @@ -176,11 +168,10 @@ def plot_opcounts(report, mod, ax=None): else: fig = None - labels, counts = gather_count_data(report=report, mod=mod) + labels, counts = gather_count_data(record=record, mod=mod) x = np.arange(len(labels)) # the label locations rects = ax.bar(x, counts) - # Add some text for labels, title and custom x-axis tick labels, etc. ax.set_ylabel('Count') ax.set_xticks(x) @@ -190,7 +181,6 @@ def plot_opcounts(report, mod, ax=None): ax.spines[['right', 'top']].set_visible(False) autolabel(ax=ax, rects=rects) - plt.tight_layout() if fig is not None: diff --git a/darshan-util/pydarshan/darshan/tests/test_plot_exp_common.py b/darshan-util/pydarshan/darshan/tests/test_plot_exp_common.py index 8e29828dc..1fa674e13 100644 --- a/darshan-util/pydarshan/darshan/tests/test_plot_exp_common.py +++ b/darshan-util/pydarshan/darshan/tests/test_plot_exp_common.py @@ -6,6 +6,7 @@ import darshan from darshan.experimental.plots import plot_opcounts, plot_access_histogram from darshan.log_utils import get_log_path +from darshan.backend.cffi_backend import accumulate_records darshan.enable_experimental() @@ -143,12 +144,16 @@ ], ) def test_xticks_and_labels(log_path, func, expected_xticklabels, mod): + if mod in ['H5F', 'H5D', 'PNETCDF_FILE', 'PNETCDF_VAR']: + pytest.xfail(reason="module not supported") + # check the x-axis tick mark locations and # labels log_path = get_log_path(log_path) with darshan.DarshanReport(log_path) as report: - - fig = func(report=report, mod=mod) + recs = report.records[mod].to_df() + acc = accumulate_records(recs, mod, report.metadata['job']['nprocs']) + fig = func(record=acc.summary_record, mod=mod) # retrieve the x-axis tick mark locations and labels # from the output figure object @@ -386,6 +391,9 @@ def test_xticks_and_labels(log_path, func, expected_xticklabels, mod): ], ) def test_bar_heights(filename, mod, fig_func, expected_heights): + if mod in ['H5F', 'H5D', 'PNETCDF_FILE', 'PNETCDF_VAR']: + pytest.xfail(reason="module not supported") + # check bar graph heights log_path = get_log_path(filename) with darshan.DarshanReport(log_path) as report: