Skip to content

Commit b7390d0

Browse files
authored
Initial commit
0 parents  commit b7390d0

File tree

10 files changed

+444
-0
lines changed

10 files changed

+444
-0
lines changed

.github/workflows/deploy.yml

Lines changed: 53 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,53 @@
1+
name: Deploy Jupyter Book
2+
3+
on:
4+
push:
5+
branches:
6+
- main
7+
env:
8+
# `BASE_URL` determines the website is served from, including CSS & JS assets
9+
# You may need to change this to `BASE_URL: ''`
10+
BASE_URL: /${{ github.event.repository.name }}
11+
jobs:
12+
build:
13+
runs-on: ubuntu-latest
14+
15+
permissions:
16+
contents: write
17+
pages: write
18+
pull-requests: write
19+
steps:
20+
- name: Checkout Repository
21+
uses: actions/checkout@v3
22+
with:
23+
submodules: 'true'
24+
25+
- name: Set up Python
26+
uses: actions/setup-python@v4
27+
with:
28+
python-version: "3.10"
29+
30+
- name: Install Dependencies
31+
run: |
32+
pip install -r requirements.txt
33+
34+
- name: Build Jupyter Book
35+
run: |
36+
# Clear potential other myst configurations in external_notebooks
37+
find external_notebooks -type f \( -iname "myst.yaml" -o -iname "myst.yml" \) -exec rm -v {} +
38+
jupyter book build --html
39+
40+
- name: Generate Notebook List
41+
run: |
42+
python .github/workflows/generate_notebook_list.py
43+
cp notebooks.json _build/html/notebooks.json
44+
cp index.html _build/html/index.html
45+
touch _build/html/.nojekyll
46+
47+
- name: Deploy to GitHub Pages
48+
uses: JamesIves/github-pages-deploy-action@v4
49+
with:
50+
folder: _build/html/
51+
branch: gh-pages
52+
clean-exclude: pr-preview
53+
force: false
Lines changed: 259 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,259 @@
1+
import os
2+
import json
3+
import nbformat
4+
import yaml
5+
import subprocess
6+
from urllib.parse import urlparse
7+
import pathlib
8+
import re
9+
import base64
10+
from PIL import Image
11+
from io import BytesIO
12+
13+
ROOT_DIR = os.path.abspath(".")
14+
OUTPUT_FILE = "notebooks.json"
15+
NOTEBOOK_DIR = "notebooks"
16+
SUBMODULE_ROOT = "external_notebooks"
17+
JHUB_INSTANCE = "workspace.earthcode.eox.at"
18+
IGNORE_FOLDERS = ["venv", ".git", ".github", "_build", "_data", "dist"]
19+
DEF_ORG = "ESA-EarthCODE"
20+
DEF_REPO = "example-viewer"
21+
22+
def extract_last_image(nb, notebook_rel_path, output_dir="_build/html/build/_assets/previews", target_width=300):
23+
os.makedirs(output_dir, exist_ok=True)
24+
found_images = []
25+
# Check markdown cells for images
26+
for cell in nb.cells:
27+
if cell.cell_type == "markdown":
28+
lines = cell.source.splitlines()
29+
for line in lines:
30+
# Match Markdown image: ![alt](path)
31+
md_img = re.findall(r'!\[.*?\]\((.*?)\)', line)
32+
if md_img:
33+
found_images.extend(md_img)
34+
# Match MyST figure directive: :::{figure} ./image.png
35+
myst_img = re.findall(r':::\{figure\}\s+(.*?)\s*$', line)
36+
if myst_img:
37+
found_images.extend(myst_img)
38+
39+
if found_images:
40+
last_image_rel = found_images[-1].strip()
41+
notebook_dir = os.path.dirname(notebook_rel_path)
42+
image_abs_path = os.path.normpath(os.path.join(notebook_dir, last_image_rel))
43+
print(f"[info] Found image: {image_abs_path}")
44+
45+
if os.path.exists(image_abs_path):
46+
try:
47+
with Image.open(image_abs_path) as img:
48+
# Resize while preserving aspect ratio
49+
w_percent = target_width / float(img.size[0])
50+
h_size = int(float(img.size[1]) * w_percent)
51+
img = img.resize((target_width, h_size), Image.LANCZOS)
52+
53+
# Save to unique file
54+
image_name = notebook_rel_path.replace("/", "_").replace(".ipynb", "_preview.png")
55+
output_path = os.path.join(output_dir, image_name)
56+
img.save(output_path)
57+
relpath = os.path.join("build/_assets/previews", image_name)
58+
return os.path.relpath(relpath, start=".").replace("\\", "/")
59+
except Exception as e:
60+
print(f"[warn] Couldn't load/resize MyST image for {notebook_rel_path}: {e}")
61+
62+
63+
# If no markdown images, check code output
64+
for cell in reversed(nb.cells):
65+
if cell.cell_type == "code":
66+
for output in reversed(cell.get("outputs", [])):
67+
data = output.get("data", {})
68+
if "image/png" in data:
69+
b64 = data["image/png"]
70+
image_bytes = base64.b64decode(b64)
71+
72+
try:
73+
# Load image from bytes
74+
image = Image.open(BytesIO(image_bytes))
75+
# Resize while maintaining aspect ratio
76+
w_percent = target_width / float(image.size[0])
77+
h_size = int(float(image.size[1]) * w_percent)
78+
image = image.resize((target_width, h_size), Image.LANCZOS)
79+
80+
# Create a filename based on notebook path
81+
base_name = notebook_rel_path.replace("/", "_").replace(".ipynb", "_preview.png")
82+
image_path = os.path.join(output_dir, base_name)
83+
image.save(image_path)
84+
relpath = os.path.join("build/_assets/previews", base_name)
85+
86+
return os.path.relpath(relpath, start=".").replace("\\", "/")
87+
except Exception as e:
88+
print(f"[warn] Failed to process image in {notebook_rel_path}: {e}")
89+
return None
90+
return None
91+
92+
def parse_gitmodules():
93+
"""Parse .gitmodules to map paths to remote info."""
94+
gitmodules_path = os.path.join(ROOT_DIR, ".gitmodules")
95+
if not os.path.exists(gitmodules_path):
96+
return {}
97+
98+
submodules = {}
99+
current = {}
100+
101+
with open(gitmodules_path, "r") as f:
102+
for line in f:
103+
line = line.strip()
104+
if line.startswith("[submodule"):
105+
if current:
106+
submodules[current["path"]] = current["url"]
107+
current = {}
108+
elif "=" in line:
109+
key, value = [x.strip() for x in line.split("=", 1)]
110+
current[key] = value
111+
if current:
112+
submodules[current["path"]] = current["url"]
113+
114+
# Convert to path → { org, repo }
115+
result = {}
116+
for path, url in submodules.items():
117+
if url.endswith(".git"):
118+
url = url[:-4]
119+
if url.startswith("git@"):
120+
url = url.replace(":", "/").replace("git@", "https://")
121+
parsed = urlparse(url)
122+
parts = parsed.path.strip("/").split("/")
123+
if len(parts) >= 2:
124+
norm_path = os.path.normpath(path)
125+
result[norm_path] = {
126+
"org": parts[0],
127+
"repo": parts[1],
128+
"url": url
129+
}
130+
131+
return result
132+
133+
def get_git_remote_info(repo_path):
134+
try:
135+
print(repo_path)
136+
url = subprocess.check_output(
137+
["git", "-C", repo_path, "config", "--get", "remote.origin.url"],
138+
text=True
139+
).strip()
140+
print(url)
141+
if url.endswith(".git"):
142+
url = url[:-4]
143+
if url.startswith("git@"):
144+
url = url.replace(":", "/").replace("git@", "https://")
145+
parsed = urlparse(url)
146+
parts = parsed.path.strip("/").split("/")
147+
if len(parts) >= 2:
148+
return {"org": parts[0], "repo": parts[1], "url": url}
149+
except Exception as e:
150+
print(f"[warn] Could not get git remote info from {repo_path}: {e}")
151+
return {"org": DEF_ORG, "repo": DEF_REPO, "url": url}
152+
153+
def extract_frontmatter(notebook_path):
154+
try:
155+
nb = nbformat.read(notebook_path, as_version=4)
156+
if nb.cells and nb.cells[0].cell_type == 'markdown':
157+
content = nb.cells[0].source
158+
if content.strip().startswith('---'):
159+
block = content.split('---')[1]
160+
return yaml.safe_load(block)
161+
except Exception as e:
162+
print(f"[warn] Failed to extract frontmatter from {notebook_path}: {e}")
163+
return {}
164+
165+
def myst_url_sanitation(url):
166+
# reverse engineering the myst url sanitation
167+
clean_url = url.replace("_-_","-").replace("_", "-").replace(" ", "-").replace("..", "").replace(":", "").replace("'", "").replace('"', "").lower()
168+
parts = clean_url.split("/")
169+
cut_url = "/".join(parts[0:-1] + [parts[-1][:50]])
170+
return cut_url
171+
172+
def extract_title_from_first_header(nb):
173+
for cell in nb.cells:
174+
if cell.cell_type == "markdown":
175+
lines = cell.source.splitlines()
176+
for line in lines:
177+
match = re.match(r'^\s*#\s+(.*)', line)
178+
if match:
179+
return match.group(1).strip()
180+
return None
181+
182+
def collect_notebooks():
183+
catalog = []
184+
git_url = get_git_remote_info(ROOT_DIR)["url"]
185+
submodules = parse_gitmodules()
186+
187+
# --- Local notebooks
188+
local_path = os.path.join(ROOT_DIR, NOTEBOOK_DIR)
189+
for dirpath, _, filenames in os.walk(local_path):
190+
if any(ignored in dirpath for ignored in IGNORE_FOLDERS):
191+
continue
192+
for file in filenames:
193+
if file.endswith(".ipynb"):
194+
abs_path = os.path.join(dirpath, file)
195+
rel_path = os.path.relpath(abs_path, ROOT_DIR).replace("\\", "/")
196+
meta = extract_frontmatter(abs_path)
197+
nb = nbformat.read(abs_path, as_version=4)
198+
image = meta.get("image") or extract_last_image(nb, rel_path)
199+
# TODO: need to extract available branch
200+
catalog.append({
201+
"title": meta.get("title", extract_title_from_first_header(nb) or os.path.splitext(file)[0].replace("_", " ")),
202+
"description": meta.get("description", ""),
203+
"metadata": meta,
204+
"image": image,
205+
"link": myst_url_sanitation(rel_path.replace(".ipynb", "")),
206+
"org": DEF_ORG,
207+
"repo": DEF_REPO,
208+
"source": "local",
209+
"path": rel_path,
210+
"gitpuller": f"https://{JHUB_INSTANCE}/hub/user-redirect/git-pull?repo={git_url}&urlpath=lab/tree/{rel_path}&branch=main",
211+
})
212+
213+
# --- Submodule notebooks
214+
submodules_root = os.path.join(ROOT_DIR, SUBMODULE_ROOT)
215+
for group in os.listdir(submodules_root):
216+
group_path = os.path.join(submodules_root, group)
217+
if not os.path.isdir(group_path):
218+
continue
219+
220+
for repo in os.listdir(group_path):
221+
sub_path = os.path.join(group_path, repo)
222+
if not os.path.isdir(sub_path):
223+
continue
224+
225+
sub_rel = os.path.relpath(sub_path, ROOT_DIR)
226+
git_info = submodules.get(os.path.normpath(sub_rel), {"org": None, "repo": None})
227+
git_url = git_info["url"]
228+
229+
for dirpath, _, filenames in os.walk(sub_path):
230+
for file in filenames:
231+
if file.endswith(".ipynb"):
232+
abs_path = os.path.join(dirpath, file)
233+
rel_path = os.path.relpath(abs_path, ROOT_DIR).replace("\\", "/")
234+
p = pathlib.Path(rel_path)
235+
repo_path = pathlib.Path(*p.parts[2:])
236+
meta = extract_frontmatter(abs_path)
237+
nb = nbformat.read(abs_path, as_version=4)
238+
image = meta.get("image") or extract_last_image(nb, rel_path)
239+
# TODO: need to extract available branch
240+
catalog.append({
241+
"title": meta.get("title", extract_title_from_first_header(nb) or os.path.splitext(file)[0].replace("_", " ")),
242+
"description": meta.get("description", ""),
243+
"metadata": meta,
244+
"image": image,
245+
"link": myst_url_sanitation(rel_path.replace(".ipynb", "")),
246+
"org": git_info["org"],
247+
"repo": git_info["repo"],
248+
"source": "submodule",
249+
"path": rel_path,
250+
"gitpuller": f"https://{JHUB_INSTANCE}/hub/user-redirect/git-pull?repo={git_url}&urlpath=lab/tree/{repo_path}&branch=main",
251+
})
252+
253+
return catalog
254+
255+
if __name__ == "__main__":
256+
notebooks = collect_notebooks()
257+
with open(OUTPUT_FILE, "w") as f:
258+
json.dump(notebooks, f, indent=2)
259+
print(f"✅ Catalog saved to {OUTPUT_FILE}")

.gitignore

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
_data
2+
_build
3+
notebooks.json

LICENSE

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
MIT License
2+
3+
Copyright (c) 2025 EOxHub Workspaces
4+
5+
Permission is hereby granted, free of charge, to any person obtaining a copy
6+
of this software and associated documentation files (the "Software"), to deal
7+
in the Software without restriction, including without limitation the rights
8+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9+
copies of the Software, and to permit persons to whom the Software is
10+
furnished to do so, subject to the following conditions:
11+
12+
The above copyright notice and this permission notice shall be included in all
13+
copies or substantial portions of the Software.
14+
15+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21+
SOFTWARE.

README.md

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
# Example Viewer Template
2+
3+
This template repository is intended to allow easy instantiation of an example viewer for jupyterlab notebooks.
4+
External repositories can be added via git submodules to the external_notebooks folder.
5+
The github action will traverse available notebooks and try to extract metadata information as well as build them with Jupyterbook (v2 and MYST).
6+
The build package is then deployed on github pages.

external_notebooks/.gitkeep

Whitespace-only changes.

0 commit comments

Comments
 (0)