eurodatacube
diff --git a/‎.github/workflows/deploy.yml
Lines changed: 53 additions & 0 deletions b/‎.github/workflows/deploy.yml
Lines changed: 53 additions & 0 deletions
diff --git a/‎.github/workflows/generate_notebook_list.py
Lines changed: 259 additions & 0 deletions b/‎.github/workflows/generate_notebook_list.py
Lines changed: 259 additions & 0 deletions
diff --git a/‎.gitignore
Lines changed: 3 additions & 0 deletions b/‎.gitignore
Lines changed: 3 additions & 0 deletions
diff --git a/‎LICENSE
Lines changed: 21 additions & 0 deletions b/‎LICENSE
Lines changed: 21 additions & 0 deletions
diff --git a/‎README.md
Lines changed: 6 additions & 0 deletions b/‎README.md
Lines changed: 6 additions & 0 deletions
diff --git a/‎external_notebooks/.gitkeep b/‎external_notebooks/.gitkeep
@@ -0,0 +1,53 @@
+name: Deploy Jupyter Book
+
+on:
+  push:
+    branches:
+      - main
+env:
+  # `BASE_URL` determines the website is served from, including CSS & JS assets
+  # You may need to change this to `BASE_URL: ''`
+  BASE_URL: /${{ github.event.repository.name }}
+jobs:
+  build:
+    runs-on: ubuntu-latest
+    
+    permissions:
+      contents: write
+      pages: write
+      pull-requests: write
+    steps:
+      - name: Checkout Repository
+        uses: actions/checkout@v3
+        with:
+          submodules: 'true'
+
+      - name: Set up Python
+        uses: actions/setup-python@v4
+        with:
+          python-version: "3.10"
+
+      - name: Install Dependencies
+        run: |
+          pip install -r requirements.txt
+
+      - name: Build Jupyter Book
+        run: |
+          # Clear potential other myst configurations in external_notebooks
+          find external_notebooks -type f \( -iname "myst.yaml" -o -iname "myst.yml" \) -exec rm -v {} +
+          jupyter book build --html
+
+      - name: Generate Notebook List
+        run: |
+          python .github/workflows/generate_notebook_list.py
+          cp notebooks.json _build/html/notebooks.json
+          cp index.html _build/html/index.html
+          touch _build/html/.nojekyll
+
+      - name: Deploy to GitHub Pages
+        uses: JamesIves/github-pages-deploy-action@v4
+        with:
+          folder: _build/html/
+          branch: gh-pages
+          clean-exclude: pr-preview
+          force: false
@@ -0,0 +1,259 @@
+import os
+import json
+import nbformat
+import yaml
+import subprocess
+from urllib.parse import urlparse
+import pathlib
+import re
+import base64
+from PIL import Image
+from io import BytesIO
+
+ROOT_DIR = os.path.abspath(".")
+OUTPUT_FILE = "notebooks.json"
+NOTEBOOK_DIR = "notebooks"
+SUBMODULE_ROOT = "external_notebooks"
+JHUB_INSTANCE = "workspace.earthcode.eox.at"
+IGNORE_FOLDERS = ["venv", ".git", ".github", "_build", "_data", "dist"]
+DEF_ORG = "ESA-EarthCODE"
+DEF_REPO = "example-viewer"
+
+def extract_last_image(nb, notebook_rel_path, output_dir="_build/html/build/_assets/previews", target_width=300):
+    os.makedirs(output_dir, exist_ok=True)
+    found_images = []
+    # Check markdown cells for images
+    for cell in nb.cells:
+        if cell.cell_type == "markdown":
+            lines = cell.source.splitlines()
+            for line in lines:
+                # Match Markdown image: ![alt](path)
+                md_img = re.findall(r'!\[.*?\]\((.*?)\)', line)
+                if md_img:
+                    found_images.extend(md_img)
+                # Match MyST figure directive: :::{figure} ./image.png
+                myst_img = re.findall(r':::\{figure\}\s+(.*?)\s*$', line)
+                if myst_img:
+                    found_images.extend(myst_img)
+
+    if found_images:
+        last_image_rel = found_images[-1].strip()
+        notebook_dir = os.path.dirname(notebook_rel_path)
+        image_abs_path = os.path.normpath(os.path.join(notebook_dir, last_image_rel))
+        print(f"[info] Found image: {image_abs_path}")
+
+        if os.path.exists(image_abs_path):
+            try:
+                with Image.open(image_abs_path) as img:
+                    # Resize while preserving aspect ratio
+                    w_percent = target_width / float(img.size[0])
+                    h_size = int(float(img.size[1]) * w_percent)
+                    img = img.resize((target_width, h_size), Image.LANCZOS)
+
+                    # Save to unique file
+                    image_name = notebook_rel_path.replace("/", "_").replace(".ipynb", "_preview.png")
+                    output_path = os.path.join(output_dir, image_name)
+                    img.save(output_path)
+                    relpath = os.path.join("build/_assets/previews", image_name)
+                    return os.path.relpath(relpath, start=".").replace("\\", "/")
+            except Exception as e:
+                print(f"[warn] Couldn't load/resize MyST image for {notebook_rel_path}: {e}")
+
+    
+    # If no markdown images, check code output
+    for cell in reversed(nb.cells):
+        if cell.cell_type == "code":
+            for output in reversed(cell.get("outputs", [])):
+                data = output.get("data", {})
+                if "image/png" in data:
+                    b64 = data["image/png"]
+                    image_bytes = base64.b64decode(b64)
+
+                    try:
+                        # Load image from bytes
+                        image = Image.open(BytesIO(image_bytes))
+                        # Resize while maintaining aspect ratio
+                        w_percent = target_width / float(image.size[0])
+                        h_size = int(float(image.size[1]) * w_percent)
+                        image = image.resize((target_width, h_size), Image.LANCZOS)
+
+                        # Create a filename based on notebook path
+                        base_name = notebook_rel_path.replace("/", "_").replace(".ipynb", "_preview.png")
+                        image_path = os.path.join(output_dir, base_name)
+                        image.save(image_path)
+                        relpath = os.path.join("build/_assets/previews", base_name)
+
+                        return os.path.relpath(relpath, start=".").replace("\\", "/")
+                    except Exception as e:
+                        print(f"[warn] Failed to process image in {notebook_rel_path}: {e}")
+                        return None
+    return None
+
+def parse_gitmodules():
+    """Parse .gitmodules to map paths to remote info."""
+    gitmodules_path = os.path.join(ROOT_DIR, ".gitmodules")
+    if not os.path.exists(gitmodules_path):
+        return {}
+
+    submodules = {}
+    current = {}
+
+    with open(gitmodules_path, "r") as f:
+        for line in f:
+            line = line.strip()
+            if line.startswith("[submodule"):
+                if current:
+                    submodules[current["path"]] = current["url"]
+                current = {}
+            elif "=" in line:
+                key, value = [x.strip() for x in line.split("=", 1)]
+                current[key] = value
+        if current:
+            submodules[current["path"]] = current["url"]
+
+    # Convert to path → { org, repo }
+    result = {}
+    for path, url in submodules.items():
+        if url.endswith(".git"):
+            url = url[:-4]
+        if url.startswith("git@"):
+            url = url.replace(":", "/").replace("git@", "https://")
+        parsed = urlparse(url)
+        parts = parsed.path.strip("/").split("/")
+        if len(parts) >= 2:
+            norm_path = os.path.normpath(path)
+            result[norm_path] = {
+                "org": parts[0],
+                "repo": parts[1],
+                "url": url
+            }
+
+    return result
+
+def get_git_remote_info(repo_path):
+    try:
+        print(repo_path)
+        url = subprocess.check_output(
+            ["git", "-C", repo_path, "config", "--get", "remote.origin.url"],
+            text=True
+        ).strip()
+        print(url)
+        if url.endswith(".git"):
+            url = url[:-4]
+        if url.startswith("git@"):
+            url = url.replace(":", "/").replace("git@", "https://")
+        parsed = urlparse(url)
+        parts = parsed.path.strip("/").split("/")
+        if len(parts) >= 2:
+            return {"org": parts[0], "repo": parts[1], "url": url}
+    except Exception as e:
+        print(f"[warn] Could not get git remote info from {repo_path}: {e}")
+    return {"org": DEF_ORG, "repo": DEF_REPO, "url": url}
+
+def extract_frontmatter(notebook_path):
+    try:
+        nb = nbformat.read(notebook_path, as_version=4)
+        if nb.cells and nb.cells[0].cell_type == 'markdown':
+            content = nb.cells[0].source
+            if content.strip().startswith('---'):
+                block = content.split('---')[1]
+                return yaml.safe_load(block)
+    except Exception as e:
+        print(f"[warn] Failed to extract frontmatter from {notebook_path}: {e}")
+    return {}
+
+def myst_url_sanitation(url):
+    # reverse engineering the myst url sanitation
+    clean_url = url.replace("_-_","-").replace("_", "-").replace(" ", "-").replace("..", "").replace(":", "").replace("'", "").replace('"', "").lower()
+    parts = clean_url.split("/")
+    cut_url = "/".join(parts[0:-1] + [parts[-1][:50]])
+    return cut_url
+
+def extract_title_from_first_header(nb):
+    for cell in nb.cells:
+        if cell.cell_type == "markdown":
+            lines = cell.source.splitlines()
+            for line in lines:
+                match = re.match(r'^\s*#\s+(.*)', line)
+                if match:
+                    return match.group(1).strip()
+    return None
+
+def collect_notebooks():
+    catalog = []
+    git_url = get_git_remote_info(ROOT_DIR)["url"]
+    submodules = parse_gitmodules()
+
+    # --- Local notebooks
+    local_path = os.path.join(ROOT_DIR, NOTEBOOK_DIR)
+    for dirpath, _, filenames in os.walk(local_path):
+        if any(ignored in dirpath for ignored in IGNORE_FOLDERS):
+            continue
+        for file in filenames:
+            if file.endswith(".ipynb"):
+                abs_path = os.path.join(dirpath, file)
+                rel_path = os.path.relpath(abs_path, ROOT_DIR).replace("\\", "/")
+                meta = extract_frontmatter(abs_path)
+                nb = nbformat.read(abs_path, as_version=4)
+                image = meta.get("image") or extract_last_image(nb, rel_path)
+                # TODO: need to extract available branch
+                catalog.append({
+                    "title": meta.get("title", extract_title_from_first_header(nb) or os.path.splitext(file)[0].replace("_", " ")),
+                    "description": meta.get("description", ""),
+                    "metadata": meta,
+                    "image": image,
+                    "link": myst_url_sanitation(rel_path.replace(".ipynb", "")),
+                    "org": DEF_ORG,
+                    "repo": DEF_REPO,
+                    "source": "local",
+                    "path": rel_path,
+                    "gitpuller": f"https://{JHUB_INSTANCE}/hub/user-redirect/git-pull?repo={git_url}&urlpath=lab/tree/{rel_path}&branch=main",
+                })
+
+    # --- Submodule notebooks
+    submodules_root = os.path.join(ROOT_DIR, SUBMODULE_ROOT)
+    for group in os.listdir(submodules_root):
+        group_path = os.path.join(submodules_root, group)
+        if not os.path.isdir(group_path):
+            continue
+
+        for repo in os.listdir(group_path):
+            sub_path = os.path.join(group_path, repo)
+            if not os.path.isdir(sub_path):
+                continue
+
+            sub_rel = os.path.relpath(sub_path, ROOT_DIR)
+            git_info = submodules.get(os.path.normpath(sub_rel), {"org": None, "repo": None})
+            git_url = git_info["url"]
+
+            for dirpath, _, filenames in os.walk(sub_path):
+                for file in filenames:
+                    if file.endswith(".ipynb"):
+                        abs_path = os.path.join(dirpath, file)
+                        rel_path = os.path.relpath(abs_path, ROOT_DIR).replace("\\", "/")
+                        p = pathlib.Path(rel_path)
+                        repo_path = pathlib.Path(*p.parts[2:])
+                        meta = extract_frontmatter(abs_path)
+                        nb = nbformat.read(abs_path, as_version=4)
+                        image = meta.get("image") or extract_last_image(nb, rel_path)
+                        # TODO: need to extract available branch
+                        catalog.append({
+                            "title": meta.get("title", extract_title_from_first_header(nb) or os.path.splitext(file)[0].replace("_", " ")),
+                            "description": meta.get("description", ""),
+                            "metadata": meta,
+                            "image": image,
+                            "link": myst_url_sanitation(rel_path.replace(".ipynb", "")),
+                            "org": git_info["org"],
+                            "repo": git_info["repo"],
+                            "source": "submodule",
+                            "path": rel_path,
+                            "gitpuller": f"https://{JHUB_INSTANCE}/hub/user-redirect/git-pull?repo={git_url}&urlpath=lab/tree/{repo_path}&branch=main",
+                        })
+
+    return catalog
+
+if __name__ == "__main__":
+    notebooks = collect_notebooks()
+    with open(OUTPUT_FILE, "w") as f:
+        json.dump(notebooks, f, indent=2)
+    print(f"✅ Catalog saved to {OUTPUT_FILE}")
@@ -0,0 +1,3 @@
+_data
+_build
+notebooks.json
@@ -0,0 +1,21 @@
+MIT License
+
+Copyright (c) 2025 EOxHub Workspaces
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
@@ -0,0 +1,6 @@
+# Example Viewer Template
+
+This template repository is intended to allow easy instantiation of an example viewer for jupyterlab notebooks.
+External repositories can be added via git submodules to the external_notebooks folder.
+The github action will traverse available notebooks and try to extract metadata information as well as build them with Jupyterbook (v2 and MYST).
+The build package is then deployed on github pages.
Original file line number	Diff line number	Diff line change
`@@ -0,0 +1,3 @@`
	`1`	`+_data`
	`2`	`+_build`
	`3`	`+notebooks.json`