diff --git a/mlos_analyzer/README.md b/mlos_analyzer/README.md new file mode 100644 index 0000000000..47ecaffb0e --- /dev/null +++ b/mlos_analyzer/README.md @@ -0,0 +1,45 @@ +# MLOS Analyzer Dashboard + +This project provides a comprehensive dashboard components for analyzing experiments conducted using MLOS. The dashboard components enables users to visualize experiment results, analyze performance metrics, and conduct statistical analyses interactively. + +The dashboard components can also be used within a notebook, or streamlit, or any platform which supports plotly. + +Another use would be to automate the process of running statistical significance tests to analyze and identify meaningful differences between configuration sets. It enables users to streamline performance analysis by automatically detecting which configurations yield compelling performance improvements. + +## Features + +1. **Experiment Overview**: + + - View dataset statistics and configuration distributions. + - Inspect the overall performance of your experiments. + +1. **Performance Analysis**: + + - Visualize metrics with whisker plots and heatmaps. + - Perform advanced analysis using parallel coordinates and performance radar plots. + +1. **Time Series Analysis**: + + - Analyze metrics over time. + - Apply moving average filters for better trend visualization. + +1. **Distribution Analysis**: + + - View metric distributions with histogram and violin plots. + +1. **Failure Analysis**: + + - Visualize success/failure distributions. + - Analyze failure rates across different configurations. + +1. **Statistical Analysis**: + + - Perform pairwise statistical tests for configuration comparison. + - Compare score distributions between different configurations. + +## Installation + +```bash +pip install -r requirements.txt +python setup.py install +``` diff --git a/mlos_analyzer/example_usage.py b/mlos_analyzer/example_usage.py new file mode 100644 index 0000000000..e0012c8d64 --- /dev/null +++ b/mlos_analyzer/example_usage.py @@ -0,0 +1,144 @@ +# +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. +# + +# Run as "streamlit run example_usage.py" + +import streamlit as st +from mlos_analyzer.core.storage import storage +from mlos_analyzer.visualization.correlation import ( + plot_correlation_table_target, + plot_heatmap, +) +from mlos_analyzer.visualization.distributions import ( + plot_metric_distribution, + plot_violin_comparison, +) +from mlos_analyzer.visualization.failure_metrics import ( + plot_failure_rate_by_config, + plot_success_failure_distribution, +) +from mlos_analyzer.visualization.performance import ( + plot_parallel_coordinates, + plot_performance_radar, +) +from mlos_analyzer.visualization.plots import plot_whisker_plots +from mlos_analyzer.visualization.statistical import ( + compare_score_distributions, + run_pairwise_stat_tests, +) +from mlos_analyzer.visualization.timeseries import ( + plot_metric_over_time, + plot_moving_average, +) + + +def main(): + st.set_page_config(page_title="MLOS Analyzer Dashboard", layout="wide") + st.title("MLOS Experiment Analysis Dashboard") + + st.sidebar.header("Settings") + experiment_ids = list(storage.experiments.keys()) + selected_experiment = st.sidebar.selectbox("Select Experiment", experiment_ids) + + if selected_experiment: + df = storage.experiments[selected_experiment].results_df + metrics = [col for col in df.columns if col.startswith("result")] + + tab1, tab2, tab3, tab4, tab5, tab6 = st.tabs( + ["Overview", "Performance", "Time Series", "Distributions", "Failures", "Statistics"] + ) + + with tab1: + st.header("Experiment Overview") + col1, col2 = st.columns(2) + with col1: + st.subheader("Dataset Info") + st.write(df.describe()) + with col2: + st.subheader("Configuration Distribution") + config_counts = df["tunable_config_id"].value_counts() + st.bar_chart(config_counts) + + with tab2: + st.header("Performance Analysis") + selected_metric = st.selectbox("Select Metric", metrics, key="perf_metric") + + col1, col2 = st.columns(2) + with col1: + fig_whisker = plot_whisker_plots(df, selected_metric) + st.plotly_chart(fig_whisker) + with col2: + fig_heatmap = plot_heatmap(df) + st.plotly_chart(fig_heatmap) + + selected_metrics = st.multiselect( + "Select Metrics for Advanced Analysis", metrics, default=metrics[:3] + ) + if selected_metrics: + col3, col4 = st.columns(2) + with col3: + fig = plot_parallel_coordinates(df, selected_metrics) + st.plotly_chart(fig) + with col4: + fig = plot_performance_radar(df, selected_metrics) + st.plotly_chart(fig) + + with tab3: + st.header("Time Series Analysis") + metric = st.selectbox("Select Metric", metrics, key="ts_metric") + window = st.slider("Moving Average Window", 2, 20, 5) + + col1, col2 = st.columns(2) + with col1: + fig = plot_metric_over_time(df, metric) + st.plotly_chart(fig) + with col2: + fig = plot_moving_average(df, metric, window) + st.plotly_chart(fig) + + with tab4: + st.header("Distribution Analysis") + metric = st.selectbox("Select Metric", metrics, key="dist_metric") + + col1, col2 = st.columns(2) + with col1: + fig = plot_metric_distribution(df, metric) + st.plotly_chart(fig) + with col2: + fig = plot_violin_comparison(df, metric) + st.plotly_chart(fig) + + with tab5: + st.header("Failure Analysis") + col1, col2 = st.columns(2) + with col1: + fig_dist = plot_success_failure_distribution(df) + st.plotly_chart(fig_dist) + with col2: + fig_rate = plot_failure_rate_by_config(df) + st.plotly_chart(fig_rate) + + with tab6: + st.header("Statistical Analysis") + test_metric = st.selectbox("Select Test Metric", metrics) + alpha = st.slider("Significance Level (α)", 0.01, 0.10, 0.05) + + results = run_pairwise_stat_tests(df, test_metric, alpha=alpha) + st.dataframe(results) + + st.subheader("Configuration Comparison") + config1, config2 = st.columns(2) + with config1: + cfg1 = st.selectbox("First Configuration", df["tunable_config_id"].unique()) + with config2: + cfg2 = st.selectbox("Second Configuration", df["tunable_config_id"].unique()) + + if cfg1 != cfg2: + fig_compare = compare_score_distributions(df, test_metric, cfg1, cfg2) + st.plotly_chart(fig_compare) + + +if __name__ == "__main__": + main() diff --git a/mlos_analyzer/mlos_analyzer/__init__.py b/mlos_analyzer/mlos_analyzer/__init__.py new file mode 100644 index 0000000000..8b9f1bdf79 --- /dev/null +++ b/mlos_analyzer/mlos_analyzer/__init__.py @@ -0,0 +1,4 @@ +# +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. +# diff --git a/mlos_analyzer/mlos_analyzer/api/__init__.py b/mlos_analyzer/mlos_analyzer/api/__init__.py new file mode 100644 index 0000000000..8b9f1bdf79 --- /dev/null +++ b/mlos_analyzer/mlos_analyzer/api/__init__.py @@ -0,0 +1,4 @@ +# +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. +# diff --git a/mlos_analyzer/mlos_analyzer/api/endpoints.py b/mlos_analyzer/mlos_analyzer/api/endpoints.py new file mode 100644 index 0000000000..3a24c21a31 --- /dev/null +++ b/mlos_analyzer/mlos_analyzer/api/endpoints.py @@ -0,0 +1,37 @@ +# +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. +# +import logging + +from fastapi import FastAPI, HTTPException +from fastapi.middleware.cors import CORSMiddleware + +from ..core.storage import storage +from .models import ExperimentExplanationRequest + +app = FastAPI() +logging.basicConfig(level=logging.INFO) +logger = logging.getLogger(__name__) + +app.add_middleware( + CORSMiddleware, + allow_origins=["*"], + allow_credentials=True, + allow_methods=["*"], + allow_headers=["*"], +) + + +@app.get("/experiments") +def get_experiments(): + return list(storage.experiments.keys()) + + +@app.get("/experiment_results/{experiment_id}") +def get_experiment_results(experiment_id: str): + try: + exp = storage.experiments[experiment_id] + return exp.results_df.to_dict(orient="records") + except KeyError: + raise HTTPException(status_code=404, detail="Experiment not found") diff --git a/mlos_analyzer/mlos_analyzer/api/models.py b/mlos_analyzer/mlos_analyzer/api/models.py new file mode 100644 index 0000000000..03d836e5fd --- /dev/null +++ b/mlos_analyzer/mlos_analyzer/api/models.py @@ -0,0 +1,9 @@ +# +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. +# +from pydantic import BaseModel + + +class ExperimentExplanationRequest(BaseModel): + experiment_id: str diff --git a/mlos_analyzer/mlos_analyzer/core/__init__.py b/mlos_analyzer/mlos_analyzer/core/__init__.py new file mode 100644 index 0000000000..8b9f1bdf79 --- /dev/null +++ b/mlos_analyzer/mlos_analyzer/core/__init__.py @@ -0,0 +1,4 @@ +# +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. +# diff --git a/mlos_analyzer/mlos_analyzer/core/storage.py b/mlos_analyzer/mlos_analyzer/core/storage.py new file mode 100644 index 0000000000..df2db7bdd7 --- /dev/null +++ b/mlos_analyzer/mlos_analyzer/core/storage.py @@ -0,0 +1,10 @@ +# +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. +# +from mlos_bench.storage import from_config + +try: + storage = from_config(config="storage/sqlite.jsonc") # PLACEHOLDER +except Exception as e: + raise Exception(f"Error loading storage configuration: {e}") diff --git a/mlos_analyzer/mlos_analyzer/utils/__init__.py b/mlos_analyzer/mlos_analyzer/utils/__init__.py new file mode 100644 index 0000000000..8b9f1bdf79 --- /dev/null +++ b/mlos_analyzer/mlos_analyzer/utils/__init__.py @@ -0,0 +1,4 @@ +# +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. +# diff --git a/mlos_analyzer/mlos_analyzer/visualization/__init__.py b/mlos_analyzer/mlos_analyzer/visualization/__init__.py new file mode 100644 index 0000000000..8b9f1bdf79 --- /dev/null +++ b/mlos_analyzer/mlos_analyzer/visualization/__init__.py @@ -0,0 +1,4 @@ +# +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. +# diff --git a/mlos_analyzer/mlos_analyzer/visualization/correlation.py b/mlos_analyzer/mlos_analyzer/visualization/correlation.py new file mode 100644 index 0000000000..f5864f9692 --- /dev/null +++ b/mlos_analyzer/mlos_analyzer/visualization/correlation.py @@ -0,0 +1,32 @@ +# +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. +# +import pandas as pd +import plotly.express as px + + +def plot_heatmap(df: pd.DataFrame): + numeric_df = df.select_dtypes(include=["int64", "float64"]) + config_columns = [col for col in numeric_df.columns if col.startswith("config")] + result_columns = [col for col in numeric_df.columns if col.startswith("result")] + + combined_data = numeric_df[config_columns + result_columns] + correlation_matrix = combined_data.corr() + + fig = px.imshow( + correlation_matrix, + title="Configuration vs Results Correlation Heatmap", + color_continuous_scale="RdBu", + ) + return fig + + +def plot_correlation_table_target(df: pd.DataFrame, target_col: str): + numeric_df = df.select_dtypes(include=["int64", "float64"]) + correlations = numeric_df.corrwith(numeric_df[target_col]).sort_values(ascending=False) + + fig = px.bar( + x=correlations.index, y=correlations.values, title=f"Correlations with {target_col}" + ) + return fig diff --git a/mlos_analyzer/mlos_analyzer/visualization/distributions.py b/mlos_analyzer/mlos_analyzer/visualization/distributions.py new file mode 100644 index 0000000000..45b5bd7a13 --- /dev/null +++ b/mlos_analyzer/mlos_analyzer/visualization/distributions.py @@ -0,0 +1,28 @@ +# +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. +# + +# src/mlos_analyzer/visualization/distributions.py +import plotly.express as px +import plotly.figure_factory as ff + + +def plot_metric_distribution(df, metric: str): + fig = ff.create_distplot( + [df[metric].dropna()], [metric], bin_size=(df[metric].max() - df[metric].min()) / 30 + ) + fig.update_layout(title=f"Distribution of {metric}") + return fig + + +def plot_violin_comparison(df, metric: str, group_by: str = "tunable_config_id"): + fig = px.violin( + df, + x=group_by, + y=metric, + box=True, + points="all", + title=f"{metric} Distribution by {group_by}", + ) + return fig diff --git a/mlos_analyzer/mlos_analyzer/visualization/failure_metrics.py b/mlos_analyzer/mlos_analyzer/visualization/failure_metrics.py new file mode 100644 index 0000000000..dcb67121ad --- /dev/null +++ b/mlos_analyzer/mlos_analyzer/visualization/failure_metrics.py @@ -0,0 +1,30 @@ +# +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. +# +import pandas as pd +import plotly.express as px + + +def plot_success_failure_distribution(df: pd.DataFrame): + status_counts = df["status"].value_counts() + return px.pie( + values=status_counts.values, + names=status_counts.index, + title="Success/Failure Distribution", + ) + + +def plot_failure_rate_by_config(df: pd.DataFrame): + failure_rate = ( + df.groupby("tunable_config_id")["status"] + .apply(lambda x: (x == "FAILED").mean()) + .reset_index() + ) + failure_rate.columns = ["tunable_config_id", "failure_rate"] + return px.bar( + failure_rate, + x="tunable_config_id", + y="failure_rate", + title="Failure Rate by Configuration", + ) diff --git a/mlos_analyzer/mlos_analyzer/visualization/performance.py b/mlos_analyzer/mlos_analyzer/visualization/performance.py new file mode 100644 index 0000000000..740a4559eb --- /dev/null +++ b/mlos_analyzer/mlos_analyzer/visualization/performance.py @@ -0,0 +1,88 @@ +# +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. +# + +from typing import List + +# src/mlos_analyzer/visualization/performance.py +import plotly.express as px +import plotly.graph_objects as go + + +def plot_parallel_coordinates(df, metrics: list[str]): + fig = px.parallel_coordinates( + df, + dimensions=[col for col in df.columns if col.startswith("config") or col in metrics], + title="Parameter and Metric Relationships", + ) + return fig + + +def plot_performance_radar(df, metrics: list[str], top_n: int = 5): + # Normalize metrics + normalized_df = df.copy() + for metric in metrics: + normalized_df[metric] = (df[metric] - df[metric].min()) / ( + df[metric].max() - df[metric].min() + ) + + # Get top configurations + top_configs = ( + normalized_df.groupby("tunable_config_id")[metrics] + .mean() + .mean(axis=1) + .nlargest(top_n) + .index + ) + + fig = go.Figure() + for config in top_configs: + config_data = normalized_df[normalized_df["tunable_config_id"] == config][metrics].mean() + fig.add_trace( + go.Scatterpolar(r=config_data.values, theta=metrics, name=f"Config {config}") + ) + + fig.update_layout(title=f"Top {top_n} Configurations Performance") + return fig # src/mlos_analyzer/visualization/performance.py + + +import plotly.express as px +import plotly.graph_objects as go + + +def plot_parallel_coordinates(df, metrics: list[str]): + fig = px.parallel_coordinates( + df, + dimensions=[col for col in df.columns if col.startswith("config") or col in metrics], + title="Parameter and Metric Relationships", + ) + return fig + + +def plot_performance_radar(df, metrics: list[str], top_n: int = 5): + # Normalize metrics + normalized_df = df.copy() + for metric in metrics: + normalized_df[metric] = (df[metric] - df[metric].min()) / ( + df[metric].max() - df[metric].min() + ) + + # Get top configurations + top_configs = ( + normalized_df.groupby("tunable_config_id")[metrics] + .mean() + .mean(axis=1) + .nlargest(top_n) + .index + ) + + fig = go.Figure() + for config in top_configs: + config_data = normalized_df[normalized_df["tunable_config_id"] == config][metrics].mean() + fig.add_trace( + go.Scatterpolar(r=config_data.values, theta=metrics, name=f"Config {config}") + ) + + fig.update_layout(title=f"Top {top_n} Configurations Performance") + return fig diff --git a/mlos_analyzer/mlos_analyzer/visualization/plots.py b/mlos_analyzer/mlos_analyzer/visualization/plots.py new file mode 100644 index 0000000000..6d964893bf --- /dev/null +++ b/mlos_analyzer/mlos_analyzer/visualization/plots.py @@ -0,0 +1,26 @@ +# +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. +# +import pandas as pd +import plotly.express as px + + +def plot_whisker_plots(df: pd.DataFrame, target_col: str, n: int = 5): + if "tunable_config_id" not in df.columns or target_col not in df.columns: + raise ValueError(f"Required columns missing") + + df[target_col] = pd.to_numeric(df[target_col], errors="coerce") + df = df.dropna(subset=[target_col]) + + config_avg = df.groupby("tunable_config_id")[target_col].mean().reset_index() + top_n_configs = config_avg.nlargest(n, target_col)["tunable_config_id"] + top_configs = df[df["tunable_config_id"].isin(top_n_configs)] + + fig = px.box( + top_configs, + x="tunable_config_id", + y=target_col, + title=f"Top {n} Configurations by {target_col}", + ) + return fig diff --git a/mlos_analyzer/mlos_analyzer/visualization/statistical.py b/mlos_analyzer/mlos_analyzer/visualization/statistical.py new file mode 100644 index 0000000000..13b90c748a --- /dev/null +++ b/mlos_analyzer/mlos_analyzer/visualization/statistical.py @@ -0,0 +1,43 @@ +# +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. +# +import pandas as pd +import plotly.graph_objects as go +from scipy import stats + + +def run_pairwise_stat_tests(df: pd.DataFrame, metric: str, alpha: float = 0.05): + configs = df["tunable_config_id"].unique() + results = [] + + for i in range(len(configs)): + for j in range(i + 1, len(configs)): + group1 = df[df["tunable_config_id"] == configs[i]][metric] + group2 = df[df["tunable_config_id"] == configs[j]][metric] + + stat, pval = stats.ttest_ind(group1, group2) + results.append( + { + "Config1": configs[i], + "Config2": configs[j], + "p-value": pval, + "Significant": pval < alpha, + } + ) + + return pd.DataFrame(results) + + +def compare_score_distributions(df: pd.DataFrame, metric: str, config1: str, config2: str): + group1 = df[df["tunable_config_id"] == config1][metric] + group2 = df[df["tunable_config_id"] == config2][metric] + + fig = go.Figure() + fig.add_trace(go.Histogram(x=group1, name=f"Config {config1}", opacity=0.7)) + fig.add_trace(go.Histogram(x=group2, name=f"Config {config2}", opacity=0.7)) + + fig.update_layout( + barmode="overlay", title=f"Score Distribution Comparison: Config {config1} vs {config2}" + ) + return fig diff --git a/mlos_analyzer/mlos_analyzer/visualization/timeseries.py b/mlos_analyzer/mlos_analyzer/visualization/timeseries.py new file mode 100644 index 0000000000..3ced9e0b1f --- /dev/null +++ b/mlos_analyzer/mlos_analyzer/visualization/timeseries.py @@ -0,0 +1,36 @@ +# +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. +# + +from typing import List + +# src/mlos_analyzer/visualization/timeseries.py +import plotly.express as px +import plotly.graph_objects as go + + +def plot_metric_over_time(df, metric: str, configs: list[str] = None): + if configs: + df = df[df["tunable_config_id"].isin(configs)] + + fig = px.line( + df, + x="ts_start", + y=metric, + color="tunable_config_id", + title=f"{metric} Over Time by Configuration", + ) + return fig + + +def plot_moving_average(df, metric: str, window: int = 5): + df = df.sort_values("ts_start") + df[f"{metric}_ma"] = df[metric].rolling(window=window).mean() + + fig = go.Figure() + fig.add_trace(go.Scatter(x=df["ts_start"], y=df[metric], mode="markers", name="Raw Data")) + fig.add_trace( + go.Scatter(x=df["ts_start"], y=df[f"{metric}_ma"], name=f"{window}-point Moving Average") + ) + return fig diff --git a/mlos_analyzer/requirements.txt b/mlos_analyzer/requirements.txt new file mode 100644 index 0000000000..d27962adc7 --- /dev/null +++ b/mlos_analyzer/requirements.txt @@ -0,0 +1,11 @@ +fastapi==0.68.0 +pandas==1.3.3 +plotly==5.3.1 +streamlit==1.2.0 +seaborn==0.11.2 +matplotlib==3.4.3 +scikit-learn==0.24.2 +scipy==1.7.1 +watchdog==2.1.6 +uvicorn==0.15.0 +azure-identity==1.7.0 diff --git a/mlos_analyzer/setup.py b/mlos_analyzer/setup.py new file mode 100644 index 0000000000..2b1165e262 --- /dev/null +++ b/mlos_analyzer/setup.py @@ -0,0 +1,24 @@ +# +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. +# +from setuptools import find_packages, setup + +setup( + name="mlos_analyzer", + version="0.1.0", + packages=find_packages(), + install_requires=[ + "fastapi", + "pandas", + "plotly", + "streamlit", + "seaborn", + "matplotlib", + "scikit-learn", + "scipy", + "watchdog", + "uvicorn", + "azure-identity", + ], +)