diff --git a/Makefile b/Makefile
index 46f0202db..cc9b90255 100644
--- a/Makefile
+++ b/Makefile
@@ -337,7 +337,7 @@ examples-pip-install:
cd examples && python setup.py install
examples-test: examples-pip-install
- cd examples && pytest --no-success-flaky-report --benchmark-disable -n auto --durations=5 . --cov=compiler_gym --cov-report=xml:$(COV_REPORT) $(PYTEST_ARGS)
+ cd examples && pytest --nbmake --no-success-flaky-report --benchmark-disable -n auto --durations=5 . --cov=compiler_gym --cov-report=xml:$(COV_REPORT) $(PYTEST_ARGS)
# Note we export $CI=1 so that the tests always run as if within the CI
# environement. This is to ensure that the reported coverage matches that of
diff --git a/examples/getting-started.ipynb b/examples/getting-started.ipynb
index ae2920f05..8864adce7 100644
--- a/examples/getting-started.ipynb
+++ b/examples/getting-started.ipynb
@@ -27,6 +27,10 @@
"id": "AidRbcu8Pwxh"
},
"source": [
+ "\n",
+ "
\n",
+ "\n",
+ "\n",
"# CompilerGym Getting Started\n",
"\n",
"CompilerGym is a toolkit for applying reinforcement learning to compiler optimization tasks. This document provides a short walkthrough of the key concepts, using the codesize reduction task of a production-grade compiler as an example. It will take about 20 minutes to work through. Lets get started!"
@@ -535,10 +539,13 @@
"name": "CompilerGym Getting Started.ipynb",
"provenance": []
},
+ "execution": {
+ "timeout": 900
+ },
"kernelspec": {
- "display_name": "Python (compiler_gym)",
+ "display_name": "Python 3 (ipykernel)",
"language": "python",
- "name": "compiler_gym"
+ "name": "python3"
},
"language_info": {
"codemirror_mode": {
@@ -550,7 +557,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
- "version": "3.8.8"
+ "version": "3.8.11"
}
},
"nbformat": 4,
diff --git a/examples/requirements.txt b/examples/requirements.txt
index 820ca2a4e..70543e830 100644
--- a/examples/requirements.txt
+++ b/examples/requirements.txt
@@ -3,12 +3,11 @@ dgl==0.6.1
geneticalgorithm>=1.0.2
hydra-core==1.1.0
keras==2.6.0
-matplotlib>=3.3.0
+matplotlib>=3.5.0
nevergrad>=0.4.3
-numpy~=1.19.2 # Pin version for tensorflow.
opentuner>=0.8.5
pandas>=1.1.5
-ray[default,rllib]==1.8.0
+ray[default,rllib]==1.9.0
submitit>=1.2.0
submitit>=1.2.0
tensorflow==2.6.1
diff --git a/examples/rllib.ipynb b/examples/rllib.ipynb
index 6a24977f8..b512b78f8 100644
--- a/examples/rllib.ipynb
+++ b/examples/rllib.ipynb
@@ -22,6 +22,10 @@
"id": "gsrdt9HooN9K"
},
"source": [
+ "\n",
+ "
\n",
+ "\n",
+ "\n",
"# Using CompilerGym environments with RLlib\n",
"\n",
"In this notebook we will use [RLlib](https://docs.ray.io/en/master/rllib.html) to train an agent for CompilerGym's [LLVM environment](https://facebookresearch.github.io/CompilerGym/llvm/index.html). RLlib is a popular library for scalable reinforcement learning, built on [Ray](https://docs.ray.io/en/master/index.html). It provides distributed implementations of several standard reinforcement learning algorithms.\n",
@@ -178,17 +182,17 @@
"from itertools import islice\n",
"\n",
"with make_env() as env:\n",
- " # The two datasets we will be using:\n",
- " npb = env.datasets[\"npb-v0\"]\n",
- " chstone = env.datasets[\"chstone-v0\"]\n",
+ " # The two datasets we will be using:\n",
+ " npb = env.datasets[\"npb-v0\"]\n",
+ " chstone = env.datasets[\"chstone-v0\"]\n",
"\n",
- " # Each dataset has a `benchmarks()` method that returns an iterator over the\n",
- " # benchmarks within the dataset. Here we will use iterator sliceing to grab a \n",
- " # handful of benchmarks for training and validation.\n",
- " train_benchmarks = list(islice(npb.benchmarks(), 55))\n",
- " train_benchmarks, val_benchmarks = train_benchmarks[:50], train_benchmarks[50:]\n",
- " # We will use the entire chstone-v0 dataset for testing.\n",
- " test_benchmarks = list(chstone.benchmarks())\n",
+ " # Each dataset has a `benchmarks()` method that returns an iterator over the\n",
+ " # benchmarks within the dataset. Here we will use iterator sliceing to grab a \n",
+ " # handful of benchmarks for training and validation.\n",
+ " train_benchmarks = list(islice(npb.benchmarks(), 55))\n",
+ " train_benchmarks, val_benchmarks = train_benchmarks[:50], train_benchmarks[50:]\n",
+ " # We will use the entire chstone-v0 dataset for testing.\n",
+ " test_benchmarks = list(chstone.benchmarks())\n",
"\n",
"print(\"Number of benchmarks for training:\", len(train_benchmarks))\n",
"print(\"Number of benchmarks for validation:\", len(val_benchmarks))\n",
@@ -217,11 +221,11 @@
"from compiler_gym.wrappers import CycleOverBenchmarks\n",
"\n",
"def make_training_env(*args) -> compiler_gym.envs.CompilerEnv:\n",
- " \"\"\"Make a reinforcement learning environment that cycles over the\n",
- " set of training benchmarks in use.\n",
- " \"\"\"\n",
- " del args # Unused env_config argument passed by ray\n",
- " return CycleOverBenchmarks(make_env(), train_benchmarks)\n",
+ " \"\"\"Make a reinforcement learning environment that cycles over the\n",
+ " set of training benchmarks in use.\n",
+ " \"\"\"\n",
+ " del args # Unused env_config argument passed by ray\n",
+ " return CycleOverBenchmarks(make_env(), train_benchmarks)\n",
"\n",
"tune.register_env(\"compiler_gym\", make_training_env)"
]
@@ -241,12 +245,12 @@
"# Lets cycle through a few calls to reset() to demonstrate that this environment\n",
"# selects a new benchmark for each episode.\n",
"with make_training_env() as env:\n",
- " env.reset()\n",
- " print(env.benchmark)\n",
- " env.reset()\n",
- " print(env.benchmark)\n",
- " env.reset()\n",
- " print(env.benchmark)"
+ " env.reset()\n",
+ " print(env.benchmark)\n",
+ " env.reset()\n",
+ " print(env.benchmark)\n",
+ " env.reset()\n",
+ " print(env.benchmark)"
]
},
{
@@ -278,7 +282,7 @@
"\n",
"# (Re)Start the ray runtime.\n",
"if ray.is_initialized():\n",
- " ray.shutdown()\n",
+ " ray.shutdown()\n",
"ray.init(include_dashboard=False, ignore_reinit_error=True)\n",
"\n",
"tune.register_env(\"compiler_gym\", make_training_env)\n",
@@ -366,18 +370,18 @@
"# performance on a set of benchmarks.\n",
"\n",
"def run_agent_on_benchmarks(benchmarks):\n",
- " \"\"\"Run agent on a list of benchmarks and return a list of cumulative rewards.\"\"\"\n",
- " with make_env() as env:\n",
+ " \"\"\"Run agent on a list of benchmarks and return a list of cumulative rewards.\"\"\"\n",
" rewards = []\n",
- " for i, benchmark in enumerate(benchmarks, start=1):\n",
- " observation, done = env.reset(benchmark=benchmark), False\n",
- " while not done:\n",
- " action = agent.compute_action(observation)\n",
- " observation, _, done, _ = env.step(action)\n",
- " rewards.append(env.episode_reward)\n",
- " print(f\"[{i}/{len(benchmarks)}] {env.state}\")\n",
+ " with make_env() as env:\n",
+ " for i, benchmark in enumerate(benchmarks, start=1):\n",
+ " observation, done = env.reset(benchmark=benchmark), False\n",
+ " while not done:\n",
+ " action = agent.compute_action(observation)\n",
+ " observation, _, done, _ = env.step(action)\n",
+ " rewards.append(env.episode_reward)\n",
+ " print(f\"[{i}/{len(benchmarks)}] {env.state}\")\n",
"\n",
- " return rewards\n",
+ " return rewards\n",
"\n",
"# Evaluate agent performance on the validation set.\n",
"val_rewards = run_agent_on_benchmarks(val_benchmarks)"
@@ -413,14 +417,15 @@
"outputs": [],
"source": [
"# Finally lets plot our results to see how we did!\n",
+ "%matplotlib inline\n",
"from matplotlib import pyplot as plt\n",
"\n",
"def plot_results(x, y, name, ax):\n",
- " plt.sca(ax)\n",
- " plt.bar(range(len(y)), y)\n",
- " plt.ylabel(\"Reward (higher is better)\")\n",
- " plt.xticks(range(len(x)), x, rotation = 90)\n",
- " plt.title(f\"Performance on {name} set\")\n",
+ " plt.sca(ax)\n",
+ " plt.bar(range(len(y)), y)\n",
+ " plt.ylabel(\"Reward (higher is better)\")\n",
+ " plt.xticks(range(len(x)), x, rotation = 90)\n",
+ " plt.title(f\"Performance on {name} set\")\n",
"\n",
"fig, (ax1, ax2) = plt.subplots(1, 2)\n",
"fig.set_size_inches(13, 3)\n",
@@ -446,14 +451,27 @@
"provenance": [],
"toc_visible": true
},
+ "execution": {
+ "timeout": 900
+ },
"kernelspec": {
- "display_name": "Python 3",
+ "display_name": "Python 3 (ipykernel)",
+ "language": "python",
"name": "python3"
},
"language_info": {
- "name": "python"
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.8.11"
}
},
"nbformat": 4,
- "nbformat_minor": 0
+ "nbformat_minor": 1
}
diff --git a/tests/requirements.txt b/tests/requirements.txt
index a93d91ec0..ec1de6175 100644
--- a/tests/requirements.txt
+++ b/tests/requirements.txt
@@ -1,4 +1,5 @@
flaky==3.7.0
+nbmake==0.10
psutil==5.8.0 # Implicit dependency of pytest-xdist
pytest==6.2.5
pytest-benchmark==3.4.1