From d1927404e774e7747f63c218f973184989023c63 Mon Sep 17 00:00:00 2001 From: ayush chaurasia Date: Mon, 28 Jul 2025 16:37:26 +0530 Subject: [PATCH] add notebook --- .../workflow.ipynb | 1157 +++++++++++++++++ 1 file changed, 1157 insertions(+) create mode 100644 docs/src/examples/geneva/ecommerce-multi-index-routing/workflow.ipynb diff --git a/docs/src/examples/geneva/ecommerce-multi-index-routing/workflow.ipynb b/docs/src/examples/geneva/ecommerce-multi-index-routing/workflow.ipynb new file mode 100644 index 0000000..588375b --- /dev/null +++ b/docs/src/examples/geneva/ecommerce-multi-index-routing/workflow.ipynb @@ -0,0 +1,1157 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 42, + "id": "ecfe1790-ecf0-44df-928e-c31173b0ac5a", + "metadata": { + "tags": [] + }, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/opt/conda/lib/python3.10/site-packages/lancedb/__init__.py:238: UserWarning: lance is not fork-safe. If you are using multiprocessing, use spawn instead.\n", + " warnings.warn(\n", + "/opt/conda/lib/python3.10/site-packages/lance/__init__.py:168: UserWarning: lance is not fork-safe. If you are using multiprocessing, use spawn instead.\n", + " warnings.warn(\n", + "huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...\n", + "To disable this warning, you can either:\n", + "\t- Avoid using `tokenizers` before the fork if possible\n", + "\t- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m A new release of pip is available: \u001b[0m\u001b[31;49m24.3.1\u001b[0m\u001b[39;49m -> \u001b[0m\u001b[32;49m25.1.1\u001b[0m\n", + "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m To update, run: \u001b[0m\u001b[32;49mpip install --upgrade pip\u001b[0m\n" + ] + } + ], + "source": [ + "!pip install --upgrade geneva lancedb google-genai kubernetes \"ray[default]\" rerankers -q" + ] + }, + { + "cell_type": "code", + "execution_count": 46, + "id": "537f53ab-39cf-459c-9ff2-f51eaad95286", + "metadata": { + "tags": [] + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + " % Total % Received % Xferd Average Speed Time Time Time Current\n", + " Dload Upload Total Spent Left Speed\n", + " 0 0 0 0 0 0 0 0 --:--:-- --:--:-- --:--:-- 0" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...\n", + "To disable this warning, you can either:\n", + "\t- Avoid using `tokenizers` before the fork if possible\n", + "\t- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + " 0 0 0 0 0 0 0 0 --:--:-- --:--:-- --:--:-- 0\n", + "100 23.0G 100 23.0G 0 0 221M 0 0:01:46 0:01:46 --:--:-- 209M\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...\n", + "To disable this warning, you can either:\n", + "\t- Avoid using `tokenizers` before the fork if possible\n", + "\t- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)\n" + ] + } + ], + "source": [ + "#!/bin/bash\n", + "!curl -L -o fashion-product-images-dataset.zip\\\n", + " https://www.kaggle.com/api/v1/datasets/download/paramaggarwal/fashion-product-images-dataset\n", + "\n", + "!unzip -q fashion-product-images-dataset.zip \n", + "#### TOY DATASET ###3\n", + "#!/bin/bash\n", + "#!curl -L -o fashion-product-images-small.zip\\\n", + "# https://www.kaggle.com/api/v1/datasets/download/paramaggarwal/fashion-product-images-small\n", + "# !unzip -q fashion-product-images-small.zip -d fashion-dataset/" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "id": "73e34039-92b8-4a80-b8c1-a2316e2c0da6", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "!sudo rm -r db" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "id": "8dc98a95-87bd-4519-b5ea-518e139a08dc", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "import time\n", + "t1 = time.time()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "8d154607-c42c-4a20-bbd0-586fdcb0a7ac", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "import os\n", + "import io\n", + "import geneva\n", + "import lancedb\n", + "import concurrent.futures\n", + "\n", + "\n", + "import pandas as pd\n", + "import geneva as gv\n", + "import pyarrow as pa\n", + "\n", + "from pathlib import Path\n", + "from PIL import Image\n", + "from google import genai\n", + "\n", + "\n", + "import torch\n", + "from transformers import CLIPProcessor, CLIPModel\n", + "\n", + "# === CONFIG ===\n", + "os.environ[\"API_KEY\"] = os.getenv(\"GEMINI_API_KEY\") or \"...\"\n", + "\n", + "\n", + "IMG_DIR = Path(\"fashion-dataset/images\")\n", + "STYLE_CSV = Path(\"fashion-dataset/styles.csv\")\n", + "DB_PATH = \"./db\"\n", + "TABLE_NAME = \"products\"\n", + "INSERT_FRAG_SIZE = 10000" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "539b21bf-219d-4adf-b697-1c5b706fc04b", + "metadata": { + "tags": [] + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "44417\n" + ] + }, + { + "data": { + "text/plain": [ + "44412" + ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df = pd.read_csv(STYLE_CSV, on_bad_lines='skip')\n", + "df = df.dropna(subset=[\"id\", \"productDisplayName\"]) \n", + "df = df.drop_duplicates(subset=[\"id\"], keep=\"first\") \n", + "df = df.dropna(subset=[\"id\", \"productDisplayName\"]) # set to 100 for testing\n", + "print(len(df))\n", + "def generate_rows(df, img_dir):\n", + " for _, row in df.iterrows():\n", + " img_path = img_dir / f\"{row['id']}.jpg\"\n", + " if not img_path.exists():\n", + " continue\n", + " with open(img_path, \"rb\") as f:\n", + " yield {\n", + " \"id\": int(row[\"id\"]),\n", + " \"description\": row[\"productDisplayName\"],\n", + " \"image_bytes\": f.read()\n", + " }\n", + "\n", + "db = lancedb.connect(DB_PATH)\n", + "if TABLE_NAME in db.table_names():\n", + " db.drop_table(TABLE_NAME)\n", + " \n", + "data_stream = generate_rows(df, IMG_DIR)\n", + "table = None\n", + "\n", + "rows = []\n", + "for row in data_stream:\n", + " rows.append(row)\n", + " if len(rows) == INSERT_FRAG_SIZE:\n", + " if table:\n", + " table.add(rows)\n", + " else:\n", + " table = db.create_table(TABLE_NAME, data=rows)\n", + " rows = []\n", + "if rows:\n", + " table.add(rows)\n", + " \n", + "len(table)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e1a4850d-1cc4-4256-8881-1f9be541902c", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "\n", + "# === FEATURE ENGINEERING with Geneva UDFs ===\n", + "table = gv.connect(DB_PATH).open_table(TABLE_NAME)\n", + "\n", + "# Simple tag extractors\n", + "@gv.udf\n", + "def color_tags(description: str)-> str:\n", + " colors = [\"black\", \"white\", \"red\", \"blue\", \"green\", \"yellow\", \"pink\", \"brown\"]\n", + " return \" , \".join([c for c in colors if c in description.lower()])\n", + "\n", + "\n", + "\n", + "@gv.udf(data_type=pa.string())\n", + "def occasion_tagger(batch: pa.RecordBatch) -> pa.Array:\n", + " _gemini = genai.Client(api_key=\"...\")\n", + " descriptions = batch.column(\"description\").to_pylist()\n", + "\n", + " def call(desc: str) -> str:\n", + " prompt = (\n", + " f\"Based on the following product description, describe the most suitable \"\n", + " f\"occasion(s) to wear this dress in ≤25 words:\\n\\n{desc}\"\n", + " )\n", + " resp = _gemini.models.generate_content(\n", + " model=\"gemini-2.5-flash-lite\",\n", + " contents=prompt,\n", + " config={\"temperature\": 0.0},\n", + " )\n", + " return resp.text.strip() if resp.text else None\n", + "\n", + " with concurrent.futures.ThreadPoolExecutor(max_workers=80) as exec:\n", + " occasions = list(exec.map(call, descriptions))\n", + "\n", + " return pa.array(occasions, type=pa.string())\n", + "\n", + " \n", + "\n", + "@gv.udf(data_type=pa.string())\n", + "def summarizer(batch: pa.RecordBatch) -> pa.Array:\n", + " _gemini = genai.Client(api_key=\"...\")\n", + " descriptions = batch.column(\"description\").to_pylist()\n", + " \n", + " def call(desc: str) -> str:\n", + " resp = _gemini.models.generate_content(\n", + " model=\"gemini-2.5-flash-lite\",\n", + " contents=f\"Summarize in ≤25 words:\\n{desc}\",\n", + " config={\"temperature\": 0.0},\n", + " )\n", + " return resp.text.strip() if resp.text else None\n", + " \n", + " with concurrent.futures.ThreadPoolExecutor(max_workers=80) as exec:\n", + " summaries = list(exec.map(call, descriptions))\n", + " \n", + " return pa.array(summaries, type=pa.string())" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "182c6074-6a82-4e51-8ce3-92c34f775c4d", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "table.add_columns({\n", + " \"color_tags\": color_tags,\n", + " \"occasion\": occasion_tagger,\n", + " \"summary\": summarizer\n", + "})" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "57c464c1-4694-4a7d-a052-d6171b6e804f", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "db = geneva.connect(DB_PATH)\n", + "if TABLE_NAME in db.table_names():\n", + " table = db[TABLE_NAME]" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "714f496f-b3ae-4c82-8c27-b30f68416afd", + "metadata": { + "tags": [] + }, + "outputs": [ + { + "data": { + "text/plain": [ + "id: int64\n", + "description: string\n", + "image_bytes: binary\n", + "color_tags: string\n", + " -- field metadata --\n", + " virtual_column.platform.python_version: '3.10.18'\n", + " virtual_column.platform.system: 'Linux'\n", + " virtual_column: 'true'\n", + " virtual_column.udf: '_udfs/682cde9a5fae61d4a646676e925c720ccf8752b2aa6a' + 20\n", + " virtual_column.udf_inputs: '[\"description\"]'\n", + " virtual_column.platform.arch: 'x86_64'\n", + " virtual_column.udf_name: 'color_tags'\n", + " virtual_column.udf_backend: 'DockerUDFSpecV1'\n", + "occasion: string\n", + " -- field metadata --\n", + " virtual_column: 'true'\n", + " virtual_column.platform.system: 'Linux'\n", + " virtual_column.udf_backend: 'DockerUDFSpecV1'\n", + " virtual_column.udf: '_udfs/78df167965d047a24e1d56bdb4860a4db5559020d570' + 20\n", + " virtual_column.platform.arch: 'x86_64'\n", + " virtual_column.platform.python_version: '3.10.18'\n", + " virtual_column.udf_inputs: 'null'\n", + " virtual_column.udf_name: 'occasion_tagger'\n", + "summary: string\n", + " -- field metadata --\n", + " virtual_column.udf: '_udfs/4b4610b4dcc9cf7b8a8f8176427e26940a6d0ae5bf18' + 20\n", + " virtual_column.udf_inputs: 'null'\n", + " virtual_column: 'true'\n", + " virtual_column.udf_name: 'summarizer'\n", + " virtual_column.udf_backend: 'DockerUDFSpecV1'\n", + " virtual_column.platform.system: 'Linux'\n", + " virtual_column.platform.arch: 'x86_64'\n", + " virtual_column.platform.python_version: '3.10.18'" + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "table.schema" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "id": "ca9b5af9-faba-497a-8e76-878ee7246711", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "import math\n", + "\n", + "CONCURRENCY = 9 # REDUCE to 4 ON COLAB \n", + "\n", + "BATCH_SIZE = 5000" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "id": "289fe85e-e7f9-4502-aec8-33e2fdaff70f", + "metadata": { + "tags": [] + }, + "outputs": [ + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "e413bb3ef72144caac523cc20fd76a7b", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "Cluster nodes provisioned: | 0 [00:00]" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "geneva compute context not ready\n" + ] + }, + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "0dac32f4a9d8474b91facd32306effc3", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "Workers scheduled: | 0 [00:00]" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\u001b[90m[\u001b[0m2025-07-28T10:32:32Z \u001b[33mWARN \u001b[0m lance::dataset::write::insert\u001b[90m]\u001b[0m No existing dataset at /home/jupyter/semantic_router/db/geneva_jobs.lance, it will be created\n", + "/opt/conda/lib/python3.10/site-packages/lancedb/__init__.py:238: UserWarning: lance is not fork-safe. If you are using multiprocessing, use spawn instead.\n", + " warnings.warn(\n", + "/opt/conda/lib/python3.10/site-packages/lance/__init__.py:168: UserWarning: lance is not fork-safe. If you are using multiprocessing, use spawn instead.\n", + " warnings.warn(\n", + "/opt/conda/lib/python3.10/site-packages/lancedb/__init__.py:238: UserWarning: lance is not fork-safe. If you are using multiprocessing, use spawn instead.\n", + " warnings.warn(\n", + "/opt/conda/lib/python3.10/site-packages/lance/__init__.py:168: UserWarning: lance is not fork-safe. If you are using multiprocessing, use spawn instead.\n", + " warnings.warn(\n", + "2025-07-28 10:32:37,770\tINFO worker.py:1918 -- Started a local Ray instance. View the dashboard at \u001b[1m\u001b[32m127.0.0.1:8265 \u001b[39m\u001b[22m\n" + ] + }, + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "1d6fda12e4144ca98909b71da3fa542f", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "Workers started: 0it [00:00, ?it/s]" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "46bf2402d2214c718815b35a08945471", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "Batches checkpointed: 0it [00:00, ?it/s]" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "e950d38de47c4f3dbc8a69ba46860e98", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "Fragments written: 0it [00:00, ?it/s]" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\u001b[36m(run_ray_add_column_remote pid=2287045)\u001b[0m \u001b[90m[\u001b[0m2025-07-28T10:32:42Z \u001b[33mWARN \u001b[0m lance::dataset::transaction\u001b[90m]\u001b[0m Building manifest with DataReplacement operation. This operation is not stable yet, please use with caution.\n", + "\u001b[36m(run_ray_add_column_remote pid=2287045)\u001b[0m \u001b[90m[\u001b[0m2025-07-28T10:32:42Z \u001b[33mWARN \u001b[0m lance::dataset::transaction\u001b[90m]\u001b[0m Building manifest with DataReplacement operation. This operation is not stable yet, please use with caution.\n", + "\u001b[36m(run_ray_add_column_remote pid=2287045)\u001b[0m \u001b[90m[\u001b[0m2025-07-28T10:32:42Z \u001b[33mWARN \u001b[0m lance::dataset::transaction\u001b[90m]\u001b[0m Building manifest with DataReplacement operation. This operation is not stable yet, please use with caution.\n", + "\u001b[36m(run_ray_add_column_remote pid=2287045)\u001b[0m \u001b[90m[\u001b[0m2025-07-28T10:32:43Z \u001b[33mWARN \u001b[0m lance::dataset::transaction\u001b[90m]\u001b[0m Building manifest with DataReplacement operation. This operation is not stable yet, please use with caution.\n", + "\u001b[36m(run_ray_add_column_remote pid=2287045)\u001b[0m \u001b[90m[\u001b[0m2025-07-28T10:32:43Z \u001b[33mWARN \u001b[0m lance::dataset::transaction\u001b[90m]\u001b[0m Building manifest with DataReplacement operation. This operation is not stable yet, please use with caution.\n" + ] + }, + { + "data": { + "text/plain": [ + "'e00a3a97-7b06-4cd4-9137-819ad0fd8cc1'" + ] + }, + "execution_count": 9, + "metadata": {}, + "output_type": "execute_result" + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\u001b[36m(run_ray_add_column_remote pid=2287045)\u001b[0m \u001b[90m[\u001b[0m2025-07-28T10:38:04Z \u001b[33mWARN \u001b[0m lance::dataset::transaction\u001b[90m]\u001b[0m Building manifest with DataReplacement operation. This operation is not stable yet, please use with caution.\n", + "\u001b[36m(run_ray_add_column_remote pid=2287045)\u001b[0m \u001b[90m[\u001b[0m2025-07-28T10:38:04Z \u001b[33mWARN \u001b[0m lance::dataset::transaction\u001b[90m]\u001b[0m Building manifest with DataReplacement operation. This operation is not stable yet, please use with caution.\n", + "\u001b[36m(run_ray_add_column_remote pid=2287045)\u001b[0m \u001b[90m[\u001b[0m2025-07-28T10:38:04Z \u001b[33mWARN \u001b[0m lance::dataset::transaction\u001b[90m]\u001b[0m Building manifest with DataReplacement operation. This operation is not stable yet, please use with caution.\n", + "\u001b[36m(run_ray_add_column_remote pid=2287045)\u001b[0m \u001b[90m[\u001b[0m2025-07-28T10:38:04Z \u001b[33mWARN \u001b[0m lance::dataset::transaction\u001b[90m]\u001b[0m Building manifest with DataReplacement operation. This operation is not stable yet, please use with caution.\n", + "\u001b[36m(run_ray_add_column_remote pid=2287045)\u001b[0m \u001b[90m[\u001b[0m2025-07-28T10:38:04Z \u001b[33mWARN \u001b[0m lance::dataset::transaction\u001b[90m]\u001b[0m Building manifest with DataReplacement operation. This operation is not stable yet, please use with caution.\n" + ] + } + ], + "source": [ + "table.backfill(\"color_tags\", batch_size=BATCH_SIZE, concurrency=CONCURRENCY)" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "id": "2d04933e-682c-4a91-89fc-490ac2114833", + "metadata": { + "tags": [] + }, + "outputs": [ + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "d4de7d7703744ada8f482645b9381ae0", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "Cluster nodes provisioned: | 0 [00:00]" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "679fd68968fa424c9cbb852e33a44863", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "Workers scheduled: | 0 [00:00]" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "0f3a4b045f2443e98f6a839701b965ff", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "Workers started: 0it [00:00, ?it/s]" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "237ff4aa04984b63a7959def7af45528", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "Batches checkpointed: 0it [00:00, ?it/s]" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "7fa4c3760764452c928cc08f0a9edc5a", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "Fragments written: 0%| | 0/5 [00:00 1\u001b[0m \u001b[43mtable\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mbackfill\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43msummary\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mbatch_size\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mBATCH_SIZE\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mconcurrency\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mCONCURRENCY\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mwhere\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43m1=1\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m)\u001b[49m\n", + "File \u001b[0;32m/opt/conda/lib/python3.10/site-packages/geneva/table.py:380\u001b[0m, in \u001b[0;36mTable.backfill\u001b[0;34m(self, col_name, input_columns, udf, where, concurrency, intra_applier_concurrency, refresh_status_secs, **kwargs)\u001b[0m\n\u001b[1;32m 369\u001b[0m \u001b[38;5;66;03m# Kick off the job\u001b[39;00m\n\u001b[1;32m 370\u001b[0m fut \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mbackfill_async(\n\u001b[1;32m 371\u001b[0m col_name,\n\u001b[1;32m 372\u001b[0m input_columns\u001b[38;5;241m=\u001b[39minput_columns,\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 377\u001b[0m \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs,\n\u001b[1;32m 378\u001b[0m )\n\u001b[0;32m--> 380\u001b[0m \u001b[38;5;28;01mwhile\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[43mfut\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mdone\u001b[49m\u001b[43m(\u001b[49m\u001b[43mtimeout\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mrefresh_status_secs\u001b[49m\u001b[43m)\u001b[49m:\n\u001b[1;32m 381\u001b[0m \u001b[38;5;66;03m# wait for the backfill to complete, updating statuses\u001b[39;00m\n\u001b[1;32m 382\u001b[0m cs\u001b[38;5;241m.\u001b[39mget_status()\n\u001b[1;32m 383\u001b[0m fut\u001b[38;5;241m.\u001b[39mstatus()\n", + "File \u001b[0;32m/opt/conda/lib/python3.10/site-packages/geneva/runners/ray/pipeline.py:801\u001b[0m, in \u001b[0;36mRayJobFuture.done\u001b[0;34m(self, timeout)\u001b[0m\n\u001b[1;32m 799\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;21mdone\u001b[39m(\u001b[38;5;28mself\u001b[39m, timeout: \u001b[38;5;28mfloat\u001b[39m \u001b[38;5;241m=\u001b[39m \u001b[38;5;241m0.0\u001b[39m) \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m>\u001b[39m \u001b[38;5;28mbool\u001b[39m:\n\u001b[1;32m 800\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mstatus()\n\u001b[0;32m--> 801\u001b[0m ready, _ \u001b[38;5;241m=\u001b[39m \u001b[43mray\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mwait\u001b[49m\u001b[43m(\u001b[49m\u001b[43m[\u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mray_obj_ref\u001b[49m\u001b[43m]\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mtimeout\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mtimeout\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 802\u001b[0m done \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mbool\u001b[39m(ready)\n\u001b[1;32m 803\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m done:\n", + "File \u001b[0;32m/opt/conda/lib/python3.10/site-packages/ray/_private/auto_init_hook.py:22\u001b[0m, in \u001b[0;36mwrap_auto_init..auto_init_wrapper\u001b[0;34m(*args, **kwargs)\u001b[0m\n\u001b[1;32m 19\u001b[0m \u001b[38;5;129m@wraps\u001b[39m(fn)\n\u001b[1;32m 20\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;21mauto_init_wrapper\u001b[39m(\u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs):\n\u001b[1;32m 21\u001b[0m auto_init_ray()\n\u001b[0;32m---> 22\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mfn\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n", + "File \u001b[0;32m/opt/conda/lib/python3.10/site-packages/ray/_private/client_mode_hook.py:104\u001b[0m, in \u001b[0;36mclient_mode_hook..wrapper\u001b[0;34m(*args, **kwargs)\u001b[0m\n\u001b[1;32m 102\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m func\u001b[38;5;241m.\u001b[39m\u001b[38;5;18m__name__\u001b[39m \u001b[38;5;241m!=\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124minit\u001b[39m\u001b[38;5;124m\"\u001b[39m \u001b[38;5;129;01mor\u001b[39;00m is_client_mode_enabled_by_default:\n\u001b[1;32m 103\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mgetattr\u001b[39m(ray, func\u001b[38;5;241m.\u001b[39m\u001b[38;5;18m__name__\u001b[39m)(\u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs)\n\u001b[0;32m--> 104\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mfunc\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n", + "File \u001b[0;32m/opt/conda/lib/python3.10/site-packages/ray/_private/worker.py:3089\u001b[0m, in \u001b[0;36mwait\u001b[0;34m(ray_waitables, num_returns, timeout, fetch_local)\u001b[0m\n\u001b[1;32m 3087\u001b[0m timeout \u001b[38;5;241m=\u001b[39m timeout \u001b[38;5;28;01mif\u001b[39;00m timeout \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m \u001b[38;5;28;01melse\u001b[39;00m \u001b[38;5;241m10\u001b[39m\u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39m\u001b[38;5;241m6\u001b[39m\n\u001b[1;32m 3088\u001b[0m timeout_milliseconds \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mint\u001b[39m(timeout \u001b[38;5;241m*\u001b[39m \u001b[38;5;241m1000\u001b[39m)\n\u001b[0;32m-> 3089\u001b[0m ready_ids, remaining_ids \u001b[38;5;241m=\u001b[39m \u001b[43mworker\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mcore_worker\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mwait\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 3090\u001b[0m \u001b[43m \u001b[49m\u001b[43mray_waitables\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 3091\u001b[0m \u001b[43m \u001b[49m\u001b[43mnum_returns\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 3092\u001b[0m \u001b[43m \u001b[49m\u001b[43mtimeout_milliseconds\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 3093\u001b[0m \u001b[43m \u001b[49m\u001b[43mfetch_local\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 3094\u001b[0m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 3095\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m ready_ids, remaining_ids\n", + "File \u001b[0;32mpython/ray/_raylet.pyx:3512\u001b[0m, in \u001b[0;36mray._raylet.CoreWorker.wait\u001b[0;34m()\u001b[0m\n", + "File \u001b[0;32mpython/ray/includes/common.pxi:83\u001b[0m, in \u001b[0;36mray._raylet.check_status\u001b[0;34m()\u001b[0m\n", + "\u001b[0;31mKeyboardInterrupt\u001b[0m: " + ] + } + ], + "source": [ + "table.backfill(\"summary\", batch_size=BATCH_SIZE, concurrency=CONCURRENCY, where=\"1=1\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "716bb917-a1aa-4868-8c98-fbdaca465e9a", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "table.search().limit(3).to_pandas()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "0445b330-ab65-409b-8025-940aac400fd6", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "import torchvision.transforms as transforms\n", + "import pyarrow as pa\n", + "import geneva as gv\n", + "import torch\n", + "from transformers import AutoTokenizer, AutoModel\n", + "\n", + "@gv.udf(data_type=pa.list_(pa.float32(), 512),\n", + " cuda=True\n", + " )\n", + "class EmbedImage:\n", + " def __init__(self):\n", + " self.ready = False\n", + "\n", + " def setup(self):\n", + " self.model = CLIPModel.from_pretrained(\"openai/clip-vit-base-patch32\").cuda()\n", + " self.transform = transforms.Compose([\n", + " transforms.Resize((224, 224)),\n", + " transforms.ToTensor(),\n", + " transforms.Normalize(\n", + " (0.48145466, 0.4578275, 0.40821073),\n", + " (0.26862954, 0.26130258, 0.27577711)\n", + " )\n", + " ])\n", + " self.ready = True\n", + "\n", + "\n", + " def __call__(self, batch: pa.RecordBatch) -> pa.Array:\n", + " if not self.ready:\n", + " self.setup()\n", + " \n", + " img_bytes = batch.column(\"image_bytes\").to_pylist()\n", + " \n", + " def preprocess(b):\n", + " img = Image.open(io.BytesIO(b)).convert(\"RGB\")\n", + " return self.transform(img)\n", + "\n", + " with concurrent.futures.ThreadPoolExecutor(max_workers=50) as exe:\n", + " tensors = list(exe.map(preprocess, img_bytes))\n", + " \n", + " whole = torch.stack(tensors).cuda(non_blocking=True)\n", + "\n", + " with torch.no_grad(), torch.cuda.amp.autocast():\n", + " feats = self.model.get_image_features(pixel_values=whole)\n", + "\n", + " out = [v.cpu().tolist() for v in feats]\n", + " return pa.array(out, type=pa.list_(pa.float32(), 512))\n", + " \n", + "\n", + "\n", + "\n", + "@gv.udf(\n", + " data_type=pa.list_(pa.float32(), 768),\n", + " cuda=True\n", + ")\n", + "class EmbedText:\n", + " def __init__(self, column: str):\n", + " self.ready = False\n", + " self.column = column\n", + "\n", + " def setup(self):\n", + " self.tokenizer = AutoTokenizer.from_pretrained(\"BAAI/bge-base-en-v1.5\")\n", + " self.model = AutoModel.from_pretrained(\"BAAI/bge-base-en-v1.5\").cuda()\n", + " self.ready = True\n", + "\n", + " def __call__(self, batch: pa.RecordBatch) -> pa.Array:\n", + " if not self.ready:\n", + " self.setup()\n", + " texts = batch.column(self.column).to_pylist()\n", + " inputs = self.tokenizer(\n", + " texts,\n", + " return_tensors=\"pt\",\n", + " padding=True,\n", + " truncation=True,\n", + " max_length=256\n", + " )\n", + " inputs = {k: v.cuda() for k, v in inputs.items()}\n", + " with torch.no_grad():\n", + " out = self.model(**inputs)\n", + " pooled = out.last_hidden_state.mean(dim=1)\n", + " embeddings = pooled.cpu().tolist()\n", + " return pa.array(embeddings, type=pa.list_(pa.float32(), 768))\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "1ce41cc0-9d95-4173-97d1-2a29b40f075b", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "table.add_columns({\n", + " \"image_embedding\": EmbedImage(), \n", + " \"summary_embedding\": EmbedText(\"summary\"),\n", + " \"occasion_embedding\": EmbedText(\"occasion\")\n", + "})\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "0dacc973-566f-4127-a3a7-a168d38bd38d", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "table.backfill(\"summary_embedding\", batch_size=BATCH_SIZE, where=\"1=1\", concurrency=CONCURRENCY )" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "f201d95e-23e5-4188-a6a3-c5c1ebdc4e4a", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "table.backfill(\"occasion_embedding\", batch_size=BATCH_SIZE, concurrency=CONCURRENCY, where=\"1=1\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ead7da89-5468-4b9e-89d9-4814a5928386", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "table.backfill(\"image_embedding\", batch_size=BATCH_SIZE, concurrency=CONCURRENCY, where=\"1=1\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e2f447ce-6424-479a-b4cd-be3b1a48c889", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "table.search().limit(4).to_pandas()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "1cb807cd-32dd-4e9d-aca7-39828e412b0e", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "table.create_fts_index(\"summary\")\n", + "table.create_fts_index(\"occasion\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "db35d78b-356d-49b3-b36c-60c539b9048f", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "table.create_index(vector_column_name=\"summary_embedding\", num_sub_vectors=128)\n", + "table.create_index(vector_column_name=\"occasion_embedding\", num_sub_vectors=128)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "61673ab0-71af-444f-b732-5ad75d5b5ce6", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "t2 = time.time()\n", + "t = t2-t1\n", + "t" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "dbc59315-3902-42de-b5c9-4d22b1d23150", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "import os\n", + "import re\n", + "import io\n", + "import lancedb\n", + "import pyarrow as pa\n", + "from google import genai\n", + "from transformers import CLIPProcessor, CLIPModel\n", + "import torch\n", + "from typing import List, Optional\n", + "import matplotlib.pyplot as plt\n", + "from PIL import Image\n", + "from lancedb.rerankers import ColbertReranker\n", + "\n", + "\n", + "\n", + "genai_client = genai.Client(api_key=\"...\")\n", + "\n", + "# === CONNECT TO LANCEDB ===\n", + "db = lancedb.connect(DB_PATH)\n", + "tbl = db.open_table(\"products\")\n", + "\n", + "# === CLIP FOR QUERY EMBEDDING ===\n", + "clip = CLIPModel.from_pretrained(\"openai/clip-vit-base-patch32\").cuda()\n", + "proc = CLIPProcessor.from_pretrained(\"openai/clip-vit-base-patch32\")\n", + "\n", + "# === FEATURE DESCRIPTIONS FOR ROUTING ===\n", + "FEATURE_DESCRIPTIONS = {\n", + " \"summary_embedding\": \"semantic intent captured from product summary\",\n", + " \"occasion_embedding\": \"semantic intent captured from occasion description\"\n", + "}\n", + "VECTOR_FEATURES = set(FEATURE_DESCRIPTIONS.keys())\n", + "\n", + "# === QUERY ROUTER USING GEMINI ===\n", + "def choose_feature(query: str) -> str:\n", + " options = \"\".join(f\"- `{f}`: {d}\" for f, d in FEATURE_DESCRIPTIONS.items())\n", + " prompt = (\n", + " f\"Given the user query:{query}\"\n", + " f\"Which one of the following features best matches the query intent? \"\n", + " f\"Choose exactly one option (only return the feature name as plain string without formatting):{options}\"\n", + " )\n", + " resp = genai_client.models.generate_content(\n", + " model=\"gemini-2.5-flash-lite\",\n", + " contents=prompt,\n", + " config={\"temperature\": 0.0}\n", + " )\n", + " return resp.text.strip()\n", + "\n", + "# === COLOR EXTRACTION ===\n", + "COLOR_LIST = [\"black\",\"white\",\"red\",\"blue\",\"green\",\"yellow\",\"pink\",\"orange\",\"grey\",\"brown\"]\n", + "def extract_color(query: str) -> Optional[str]:\n", + " q = query.lower()\n", + " for c in COLOR_LIST:\n", + " if re.search(rf\"\b{c}\b\", q):\n", + " return c\n", + " return None\n", + "\n", + "# === QUERY EMBEDDING FUNCTION ===\n", + "def embed_query(query: str) -> List[float]:\n", + " inputs = proc(text=[query], return_tensors=\"pt\", padding=True)\n", + " inputs = {k: v.cuda() for k, v in inputs.items()}\n", + " with torch.no_grad():\n", + " emb = clip.get_text_features(**inputs)[0]\n", + " return emb.cpu().tolist()\n", + "\n", + "reranker_occasion = ColbertReranker(column=\"occasion\")\n", + "reranker_summary = ColbertReranker(column=\"summary\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "0a934e63-4963-461e-97ad-6325049e9221", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "\n", + "# === MAIN SEARCH FUNCTION (Hybrid Only) ===\n", + "def search(query: str,\n", + " k: int = 5,\n", + " color_filter: Optional[str] = None) -> List[dict]:\n", + " # Route to feature\n", + " feature = choose_feature(query)\n", + " print(f\"[Router] Selected feature: {feature}\")\n", + "\n", + " # Extract color filter\n", + " color = extract_color(query)\n", + " if color:\n", + " print(f\"[Filter] Color detected: {color}\")\n", + "\n", + " # Always hybrid search on vector feature\n", + " search_input = embed_query(query)\n", + " reranker = reranker_summary if feature==\"summary_embedding\" else reranker_occasion\n", + " qb = tbl.search(query_type=\"hybrid\", vector_column_name=feature).vector(search_input).text(query).rerank(reranker)\n", + " \n", + " #qb = tbl.search(search_input, query_type=\"vector\", vector_column_name=feature)\n", + " if color:\n", + " print(\"filter :\", color)\n", + " qb = qb.where(f\"color_tags like '{color}'\", prefilter=True)\n", + "\n", + " qb = qb.limit(k*2) # overfetch and rerank\n", + " return qb.to_list()[:k]\n", + "\n", + "# === DISPLAY RESULTS FUNCTION ===\n", + "def display_results(results: List[dict], cols: int = 4):\n", + " n = len(results)\n", + " rows = (n + cols - 1) // cols\n", + " fig, axes = plt.subplots(rows, cols, figsize=(cols * 3, rows * 3))\n", + " axes = axes.flatten()\n", + " for ax in axes[n:]:\n", + " ax.axis('off')\n", + " for i, item in enumerate(results):\n", + " img = Image.open(io.BytesIO(item['image_bytes']))\n", + " axes[i].imshow(img)\n", + " axes[i].set_title(item.get('description','')[:15], fontsize=8)\n", + " axes[i].axis('off')\n", + " plt.tight_layout()\n", + " plt.show()\n", + "\n", + "# === EXAMPLE USAGE ===\n", + "if __name__ == \"__main__\":\n", + " queries = [\n", + " \"black jacket\",\n", + " \"shirt or t-shirt for business meeting\"\n", + " ]\n", + " for q in queries:\n", + " print(f\"Query: {q}\")\n", + " res = search(q, k=5, color_filter=None)\n", + " print([r[\"occasion\"] for r in res])\n", + " display_results(res, cols=3)\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "cb375cb5-0151-4c74-9ead-c9755ac7629c", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "f04d09e4-4474-4419-89cd-199b123d8ad9", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "import os\n", + "\n", + "\n", + "import lancedb\n", + "from lancedb.pydantic import LanceModel, Vector\n", + "\n", + "\n", + "\n", + "class Documents(LanceModel):\n", + " vec: Vector(2) \n", + " text: str\n", + "\n", + "data = [\n", + " {\"text\": \"rebel spaceships striking from a hidden base\", \"vec\": [0,0]},\n", + " {\"text\": \"have won their first victory against the evil Galactic Empire\", \"vec\": [0,0]},\n", + " {\"text\": \"during the battle rebel spies managed to steal secret plans\", \"vec\": [0,0]},\n", + " {\"text\": \"to the Empire's ultimate weapon the Death Star\", \"vector\": [0,0]},\n", + "]\n", + "uri = \"data/sample-lancedb\"\n", + "db = lancedb.connect(uri)\n", + "table = db.create_table(\"df\", schema=Documents, mode=\"overwrite\")\n", + "# ingest docs with auto-vectorization\n", + "table.add(data)\n", + "# Create a fts index before the hybrid search\n", + "table.create_fts_index(\"text\")\n", + "# hybrid search with default re-ranker\n", + "table.search(query_type=\"hybrid\", vector_column_name=\"vec\").text(\"text\").vector([0,0]).to_pandas()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "006bb922-b9c4-4af9-b9eb-f3e552bbaf2e", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "environment": { + "kernel": "conda-base-py", + "name": "workbench-notebooks.m130", + "type": "gcloud", + "uri": "us-docker.pkg.dev/deeplearning-platform-release/gcr.io/workbench-notebooks:m130" + }, + "kernelspec": { + "display_name": "Python 3 (ipykernel) (Local)", + "language": "python", + "name": "conda-base-py" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.18" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +}