diff --git a/docs/docs/integrations/providers/superlinked.mdx b/docs/docs/integrations/providers/superlinked.mdx
new file mode 100644
index 0000000000000..72c62f47b1ebf
--- /dev/null
+++ b/docs/docs/integrations/providers/superlinked.mdx
@@ -0,0 +1,140 @@
+---
+title: Superlinked
+description: LangChain integration package for the Superlinked retrieval stack
+---
+
+import Link from '@docusaurus/Link';
+
+### Overview
+
+Superlinked enables contextβaware retrieval using multiple space types (text similarity, categorical, numerical, recency, and more). The `langchain-superlinked` package provides a LangChainβnative `SuperlinkedRetriever` that plugs directly into your RAG chains.
+
+### Links
+
+- Integration repository
+- Superlinked core repository
+- Article: Build RAG using LangChain & Superlinked
+
+### Install
+
+```bash
+pip install -U langchain-superlinked superlinked
+```
+
+### Quickstart
+
+```python
+import superlinked.framework as sl
+from langchain_superlinked import SuperlinkedRetriever
+
+# 1) Define schema
+class DocumentSchema(sl.Schema):
+ id: sl.IdField
+ content: sl.String
+
+doc_schema = DocumentSchema()
+
+# 2) Define space and index
+text_space = sl.TextSimilaritySpace(
+ text=doc_schema.content, model="sentence-transformers/all-MiniLM-L6-v2"
+)
+doc_index = sl.Index([text_space])
+
+# 3) Define query
+query = (
+ sl.Query(doc_index)
+ .find(doc_schema)
+ .similar(text_space.text, sl.Param("query_text"))
+ .select([doc_schema.content])
+ .limit(sl.Param("limit"))
+)
+
+# 4) Minimal app setup
+source = sl.InMemorySource(schema=doc_schema)
+executor = sl.InMemoryExecutor(sources=[source], indices=[doc_index])
+app = executor.run()
+source.put([
+ {"id": "1", "content": "Machine learning algorithms process data efficiently."},
+ {"id": "2", "content": "Natural language processing understands human language."},
+])
+
+# 5) LangChain retriever
+retriever = SuperlinkedRetriever(
+ sl_client=app, sl_query=query, page_content_field="content"
+)
+
+# Search
+docs = retriever.invoke("artificial intelligence", limit=2)
+for d in docs:
+ print(d.page_content)
+```
+
+### What the retriever expects (App and Query)
+
+The retriever takes two core inputs:
+
+- `sl_client`: a Superlinked App created by running an executor (e.g., `InMemoryExecutor(...).run()`)
+- `sl_query`: a `QueryDescriptor` returned by chaining `sl.Query(...).find(...).similar(...).select(...).limit(...)`
+
+Minimal setup:
+
+```python
+import superlinked.framework as sl
+from langchain_superlinked import SuperlinkedRetriever
+
+class Doc(sl.Schema):
+ id: sl.IdField
+ content: sl.String
+
+doc = Doc()
+space = sl.TextSimilaritySpace(text=doc.content, model="sentence-transformers/all-MiniLM-L6-v2")
+index = sl.Index([space])
+
+query = (
+ sl.Query(index)
+ .find(doc)
+ .similar(space.text, sl.Param("query_text"))
+ .select([doc.content])
+ .limit(sl.Param("limit"))
+)
+
+source = sl.InMemorySource(schema=doc)
+app = sl.InMemoryExecutor(sources=[source], indices=[index]).run()
+
+retriever = SuperlinkedRetriever(sl_client=app, sl_query=query, page_content_field="content")
+```
+
+Note: For a persistent vector DB, pass `vector_database=...` to the executor (e.g., Qdrant) before `.run()`.
+
+### Use within a chain
+
+```python
+from langchain_core.runnables import RunnablePassthrough
+from langchain_core.prompts import ChatPromptTemplate
+from langchain_openai import ChatOpenAI
+
+def format_docs(docs):
+ return "\n\n".join(doc.page_content for doc in docs)
+
+prompt = ChatPromptTemplate.from_template(
+ """
+ Answer based on context:\n\nContext: {context}\nQuestion: {question}
+ """
+)
+
+chain = ({"context": retriever | format_docs, "question": RunnablePassthrough()}
+ | prompt
+ | ChatOpenAI())
+
+answer = chain.invoke("How does machine learning work?")
+```
+
+### Resources
+
+- PyPI: langchain-superlinked
+- PyPI: superlinked
+- Source repository
+- Superlinked core repository
+- Build RAG using LangChain & Superlinked (article)
+
+
diff --git a/docs/docs/integrations/retrievers/superlinked.ipynb b/docs/docs/integrations/retrievers/superlinked.ipynb
new file mode 100644
index 0000000000000..c6fb15c819abf
--- /dev/null
+++ b/docs/docs/integrations/retrievers/superlinked.ipynb
@@ -0,0 +1,1292 @@
+{
+ "cells": [
+ {
+ "cell_type": "raw",
+ "id": "7fb27b941602401d91542211134fc71a",
+ "metadata": {
+ "vscode": {
+ "languageId": "raw"
+ }
+ },
+ "source": [
+ "---\n",
+ "sidebar_label: SuperlinkedRetriever\n",
+ "---\n"
+ ]
+ },
+ {
+ "cell_type": "raw",
+ "id": "8ce296f3",
+ "metadata": {
+ "vscode": {
+ "languageId": "raw"
+ }
+ },
+ "source": [
+ "# SuperlinkedRetriever\n",
+ "\n",
+ "> [Superlinked](https://github.com/superlinked/superlinked) is a library for building context-aware vector search applications. It provides multi-modal vector spaces that can handle text similarity, categorical similarity, recency, and numerical values with flexible weighting strategies.\n",
+ "\n",
+ "This will help you get started with the SuperlinkedRetriever [retriever](/docs/concepts/retrievers/). For detailed documentation of all SuperlinkedRetriever features and configurations head to the [API reference](https://python.langchain.com/api_reference/superlinked/retrievers/langchain_superlinked.retrievers.SuperlinkedRetriever.html).\n",
+ "\n",
+ "### Further reading\n",
+ "\n",
+ "- External article: [Build RAG using LangChain & Superlinked](https://links.superlinked.com/langchain_article)\n",
+ "- Integration repo: [superlinked/langchain-superlinked](https://github.com/superlinked/langchain-superlinked)\n",
+ "- Superlinked core repo: [superlinked/superlinked](https://links.superlinked.com/langchain_repo_sl)\n",
+ "\n",
+ "### Integration details\n",
+ "\n",
+ "| Retriever | Source | Package |\n",
+ "| :--- | :--- | :---: |\n",
+ "[SuperlinkedRetriever](https://python.langchain.com/api_reference/superlinked/retrievers/langchain_superlinked.retrievers.SuperlinkedRetriever.html) | Multi-modal vector search | langchain-superlinked |\n",
+ "\n",
+ "## Setup\n",
+ "\n",
+ "The SuperlinkedRetriever requires the `langchain-superlinked` package and its peer dependency `superlinked`. You can install these with:\n",
+ "\n",
+ "```bash\n",
+ "pip install -U langchain-superlinked superlinked\n",
+ "```\n",
+ "\n",
+ "No API keys are required for basic usage as Superlinked can run in-memory or with local vector databases.\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "efd00169",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Optional: Set up for vector database usage\n",
+ "# import os\n",
+ "# os.environ[\"QDRANT_API_KEY\"] = \"your-api-key\" # For Qdrant\n",
+ "# No setup required for in-memory usage\n"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "aff64151",
+ "metadata": {},
+ "source": [
+ "### App and Query: what the retriever needs\n",
+ "\n",
+ "The retriever requires:\n",
+ "\n",
+ "- `sl_client`: a Superlinked App created by an executor's `.run()`\n",
+ "- `sl_query`: a `QueryDescriptor` built via `sl.Query(...).find(...).similar(...).select(...).limit(...)`\n",
+ "\n",
+ "Minimal example:\n",
+ "\n",
+ "```python\n",
+ "import superlinked.framework as sl\n",
+ "from langchain_superlinked import SuperlinkedRetriever\n",
+ "\n",
+ "class Doc(sl.Schema):\n",
+ " id: sl.IdField\n",
+ " content: sl.String\n",
+ "\n",
+ "doc = Doc()\n",
+ "space = sl.TextSimilaritySpace(text=doc.content, model=\"sentence-transformers/all-MiniLM-L6-v2\")\n",
+ "index = sl.Index([space])\n",
+ "\n",
+ "query = (\n",
+ " sl.Query(index)\n",
+ " .find(doc)\n",
+ " .similar(space.text, sl.Param(\"query_text\"))\n",
+ " .select([doc.content])\n",
+ " .limit(sl.Param(\"limit\"))\n",
+ ")\n",
+ "\n",
+ "source = sl.InMemorySource(schema=doc)\n",
+ "app = sl.InMemoryExecutor(sources=[source], indices=[index]).run()\n",
+ "\n",
+ "retriever = SuperlinkedRetriever(sl_client=app, sl_query=query, page_content_field=\"content\")\n",
+ "```\n",
+ "\n",
+ "For a production setup, create the executor with a vector DB (e.g., Qdrant) and pass it as `vector_database=...` before calling `.run()`.\n"
+ ]
+ },
+ {
+ "cell_type": "raw",
+ "id": "acae54e37e7d407bbb7b55eff062a284",
+ "metadata": {
+ "vscode": {
+ "languageId": "raw"
+ }
+ },
+ "source": [
+ "## Instantiation\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "346a75b0",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "import superlinked.framework as sl\n",
+ "from langchain_superlinked import SuperlinkedRetriever\n",
+ "\n",
+ "\n",
+ "# 1. Define Schema\n",
+ "class DocumentSchema(sl.Schema):\n",
+ " id: sl.IdField\n",
+ " content: sl.String\n",
+ "\n",
+ "\n",
+ "doc_schema = DocumentSchema()\n",
+ "\n",
+ "# 2. Define Space and Index\n",
+ "text_space = sl.TextSimilaritySpace(\n",
+ " text=doc_schema.content, model=\"sentence-transformers/all-MiniLM-L6-v2\"\n",
+ ")\n",
+ "doc_index = sl.Index([text_space])\n",
+ "\n",
+ "# 3. Define Query\n",
+ "query = (\n",
+ " sl.Query(doc_index)\n",
+ " .find(doc_schema)\n",
+ " .similar(text_space.text, sl.Param(\"query_text\"))\n",
+ " .select([doc_schema.content])\n",
+ " .limit(sl.Param(\"limit\"))\n",
+ ")\n",
+ "\n",
+ "# 4. Set up data and app\n",
+ "documents = [\n",
+ " {\n",
+ " \"id\": \"doc1\",\n",
+ " \"content\": \"Machine learning algorithms can process large datasets efficiently.\",\n",
+ " },\n",
+ " {\n",
+ " \"id\": \"doc2\",\n",
+ " \"content\": \"Natural language processing enables computers to understand human language.\",\n",
+ " },\n",
+ " {\n",
+ " \"id\": \"doc3\",\n",
+ " \"content\": \"Deep learning models require significant computational resources.\",\n",
+ " },\n",
+ " {\n",
+ " \"id\": \"doc4\",\n",
+ " \"content\": \"Artificial intelligence is transforming various industries.\",\n",
+ " },\n",
+ " {\n",
+ " \"id\": \"doc5\",\n",
+ " \"content\": \"Neural networks are inspired by biological brain structures.\",\n",
+ " },\n",
+ "]\n",
+ "\n",
+ "source = sl.InMemorySource(schema=doc_schema)\n",
+ "executor = sl.InMemoryExecutor(sources=[source], indices=[doc_index])\n",
+ "app = executor.run()\n",
+ "source.put(documents)\n",
+ "\n",
+ "# 5. Create Retriever\n",
+ "retriever = SuperlinkedRetriever(\n",
+ " sl_client=app, sl_query=query, page_content_field=\"content\", k=3\n",
+ ")"
+ ]
+ },
+ {
+ "cell_type": "raw",
+ "id": "9a63283cbaf04dbcab1f6479b197f3a8",
+ "metadata": {
+ "vscode": {
+ "languageId": "raw"
+ }
+ },
+ "source": [
+ "## Usage\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "fcc79d71",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Basic usage\n",
+ "results = retriever.invoke(\"artificial intelligence and machine learning\", limit=2)\n",
+ "for i, doc in enumerate(results, 1):\n",
+ " print(f\"Document {i}:\")\n",
+ " print(f\"Content: {doc.page_content}\")\n",
+ " print(f\"Metadata: {doc.metadata}\")\n",
+ " print(\"---\")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "4afdd5ff",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Override k parameter at query time\n",
+ "more_results = retriever.invoke(\"neural networks and deep learning\", k=4)\n",
+ "print(f\"Retrieved {len(more_results)} documents:\")\n",
+ "for i, doc in enumerate(more_results, 1):\n",
+ " print(f\"{i}. {doc.page_content[:50]}...\")"
+ ]
+ },
+ {
+ "cell_type": "raw",
+ "id": "8dd0d8092fe74a7c96281538738b07e2",
+ "metadata": {
+ "vscode": {
+ "languageId": "raw"
+ }
+ },
+ "source": [
+ "## Use within a chain\n",
+ "\n",
+ "Like other retrievers, SuperlinkedRetriever can be incorporated into LLM applications via [chains](/docs/how_to/sequence/).\n",
+ "\n",
+ "We will need a LLM or chat model:\n",
+ "\n",
+ "import ChatModelTabs from \"@theme/ChatModelTabs\";\n",
+ "\n",
+ "\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "a42e63cf",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# pip install -qU langchain-openai\n",
+ "import getpass\n",
+ "import os\n",
+ "\n",
+ "if not os.environ.get(\"OPENAI_API_KEY\"):\n",
+ " os.environ[\"OPENAI_API_KEY\"] = getpass.getpass(\"Enter your OpenAI API key: \")\n",
+ "\n",
+ "from langchain_openai import ChatOpenAI\n",
+ "\n",
+ "llm = ChatOpenAI(model=\"gpt-4o-mini\")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "0158acc1",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "from langchain import hub\n",
+ "from langchain_core.output_parsers import StrOutputParser\n",
+ "from langchain_core.runnables import RunnablePassthrough\n",
+ "\n",
+ "prompt = hub.pull(\"rlm/rag-prompt\")\n",
+ "\n",
+ "\n",
+ "def format_docs(docs):\n",
+ " return \"\\n\\n\".join(doc.page_content for doc in docs)\n",
+ "\n",
+ "\n",
+ "rag_chain = (\n",
+ " {\"context\": retriever | format_docs, \"question\": RunnablePassthrough()}\n",
+ " | prompt\n",
+ " | llm\n",
+ " | StrOutputParser()\n",
+ ")\n",
+ "\n",
+ "rag_chain.invoke(\"What is machine learning and how does it work?\")"
+ ]
+ },
+ {
+ "cell_type": "raw",
+ "id": "72eea5119410473aa328ad9291626812",
+ "metadata": {
+ "vscode": {
+ "languageId": "raw"
+ }
+ },
+ "source": [
+ "## API reference\n",
+ "\n",
+ "For detailed documentation of all SuperlinkedRetriever features and configurations, head to the [API reference](https://python.langchain.com/api_reference/superlinked/retrievers/langchain_superlinked.retrievers.SuperlinkedRetriever.html).\n"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "f76bb133",
+ "metadata": {},
+ "source": [
+ "\"\"\"\n",
+ "SuperlinkedRetriever Usage Examples\n",
+ "\n",
+ "This file demonstrates how to use the SuperlinkedRetriever with different\n",
+ "space configurations to showcase its flexibility across various use cases.\n",
+ "\"\"\""
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "31a9d4f5",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "import superlinked.framework as sl\n",
+ "from datetime import datetime, timedelta\n",
+ "from typing import Optional, List, Dict, Any\n",
+ "from langchain_core.documents import Document\n",
+ "\n",
+ "from langchain_superlinked import SuperlinkedRetriever"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "4a608192",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "def example_1_simple_text_search():\n",
+ " \"\"\"\n",
+ " Example 1: Simple text-based semantic search\n",
+ " Use case: Basic document retrieval based on content similarity\n",
+ " \"\"\"\n",
+ " print(\"=== Example 1: Simple Text Search ===\")\n",
+ "\n",
+ " # 1. Define Schema\n",
+ " class DocumentSchema(sl.Schema):\n",
+ " id: sl.IdField\n",
+ " content: sl.String\n",
+ "\n",
+ " doc_schema = DocumentSchema()\n",
+ "\n",
+ " # 2. Define Space and Index\n",
+ " text_space = sl.TextSimilaritySpace(\n",
+ " text=doc_schema.content, model=\"sentence-transformers/all-MiniLM-L6-v2\"\n",
+ " )\n",
+ "\n",
+ " doc_index = sl.Index([text_space])\n",
+ "\n",
+ " # 3. Define Query\n",
+ " query = (\n",
+ " sl.Query(doc_index)\n",
+ " .find(doc_schema)\n",
+ " .similar(text_space.text, sl.Param(\"query_text\"))\n",
+ " .select([doc_schema.content])\n",
+ " .limit(sl.Param(\"limit\"))\n",
+ " )\n",
+ "\n",
+ " # 4. Set up data and app using executor pattern\n",
+ " documents = [\n",
+ " {\n",
+ " \"id\": \"doc1\",\n",
+ " \"content\": \"Machine learning algorithms can process large datasets efficiently.\",\n",
+ " },\n",
+ " {\n",
+ " \"id\": \"doc2\",\n",
+ " \"content\": \"Natural language processing enables computers to understand human language.\",\n",
+ " },\n",
+ " {\n",
+ " \"id\": \"doc3\",\n",
+ " \"content\": \"Deep learning models require significant computational resources.\",\n",
+ " },\n",
+ " {\n",
+ " \"id\": \"doc4\",\n",
+ " \"content\": \"Data science combines statistics, programming, and domain expertise.\",\n",
+ " },\n",
+ " {\n",
+ " \"id\": \"doc5\",\n",
+ " \"content\": \"Artificial intelligence is transforming various industries.\",\n",
+ " },\n",
+ " ]\n",
+ "\n",
+ " # Create source and executor\n",
+ " source = sl.InMemorySource(schema=doc_schema)\n",
+ " executor = sl.InMemoryExecutor(sources=[source], indices=[doc_index])\n",
+ " app = executor.run()\n",
+ "\n",
+ " # Add data to the source after the app is running\n",
+ " source.put(documents)\n",
+ "\n",
+ " # 5. Create Retriever\n",
+ " retriever = SuperlinkedRetriever(\n",
+ " sl_client=app, sl_query=query, page_content_field=\"content\"\n",
+ " )\n",
+ "\n",
+ " # 6. Use the retriever\n",
+ " results = retriever.invoke(\"artificial intelligence and machine learning\", limit=3)\n",
+ "\n",
+ " print(f\"Query: 'artificial intelligence and machine learning'\")\n",
+ " print(f\"Found {len(results)} documents:\")\n",
+ " for i, doc in enumerate(results, 1):\n",
+ " print(f\" {i}. {doc.page_content}\")\n",
+ " print()\n",
+ "\n",
+ "\n",
+ "def example_2_multi_space_blog_search():\n",
+ " \"\"\"\n",
+ " Example 2: Multi-space blog post search\n",
+ " Use case: Blog search with content, category, and recency\n",
+ " \"\"\"\n",
+ " print(\"=== Example 2: Multi-Space Blog Search ===\")\n",
+ "\n",
+ " # 1. Define Schema\n",
+ " class BlogPostSchema(sl.Schema):\n",
+ " id: sl.IdField\n",
+ " title: sl.String\n",
+ " content: sl.String\n",
+ " category: sl.String\n",
+ " published_date: sl.Timestamp\n",
+ " view_count: sl.Integer\n",
+ "\n",
+ " blog_schema = BlogPostSchema()\n",
+ "\n",
+ " # 2. Define Multiple Spaces\n",
+ " # Text similarity for content\n",
+ " content_space = sl.TextSimilaritySpace(\n",
+ " text=blog_schema.content, model=\"sentence-transformers/all-MiniLM-L6-v2\"\n",
+ " )\n",
+ "\n",
+ " # Title similarity\n",
+ " title_space = sl.TextSimilaritySpace(\n",
+ " text=blog_schema.title, model=\"sentence-transformers/all-MiniLM-L6-v2\"\n",
+ " )\n",
+ "\n",
+ " # Category similarity\n",
+ " category_space = sl.CategoricalSimilaritySpace(\n",
+ " category_input=blog_schema.category,\n",
+ " categories=[\"technology\", \"science\", \"business\", \"health\", \"travel\"],\n",
+ " )\n",
+ "\n",
+ " # Recency (favor recent posts)\n",
+ " recency_space = sl.RecencySpace(\n",
+ " timestamp=blog_schema.published_date,\n",
+ " period_time_list=[\n",
+ " sl.PeriodTime(timedelta(days=30)), # Last month\n",
+ " sl.PeriodTime(timedelta(days=90)), # Last 3 months\n",
+ " sl.PeriodTime(timedelta(days=365)), # Last year\n",
+ " ],\n",
+ " )\n",
+ "\n",
+ " # Popularity (based on view count)\n",
+ " popularity_space = sl.NumberSpace(\n",
+ " number=blog_schema.view_count,\n",
+ " min_value=0,\n",
+ " max_value=10000,\n",
+ " mode=sl.Mode.MAXIMUM,\n",
+ " )\n",
+ "\n",
+ " # 3. Create Index\n",
+ " blog_index = sl.Index(\n",
+ " [content_space, title_space, category_space, recency_space, popularity_space]\n",
+ " )\n",
+ "\n",
+ " # 4. Define Query with multiple weighted spaces\n",
+ " blog_query = (\n",
+ " sl.Query(\n",
+ " blog_index,\n",
+ " weights={\n",
+ " content_space: sl.Param(\"content_weight\"),\n",
+ " title_space: sl.Param(\"title_weight\"),\n",
+ " category_space: sl.Param(\"category_weight\"),\n",
+ " recency_space: sl.Param(\"recency_weight\"),\n",
+ " popularity_space: sl.Param(\"popularity_weight\"),\n",
+ " },\n",
+ " )\n",
+ " .find(blog_schema)\n",
+ " .similar(content_space.text, sl.Param(\"query_text\"))\n",
+ " .select(\n",
+ " [\n",
+ " blog_schema.title,\n",
+ " blog_schema.content,\n",
+ " blog_schema.category,\n",
+ " blog_schema.published_date,\n",
+ " blog_schema.view_count,\n",
+ " ]\n",
+ " )\n",
+ " .limit(sl.Param(\"limit\"))\n",
+ " )\n",
+ "\n",
+ " # 5. Sample blog data\n",
+ " from datetime import datetime\n",
+ "\n",
+ " # Convert datetime objects to unix timestamps (integers) as required by Timestamp schema field\n",
+ " blog_posts = [\n",
+ " {\n",
+ " \"id\": \"post1\",\n",
+ " \"title\": \"Introduction to Machine Learning\",\n",
+ " \"content\": \"Machine learning is revolutionizing how we process data and make predictions.\",\n",
+ " \"category\": \"technology\",\n",
+ " \"published_date\": int((datetime.now() - timedelta(days=5)).timestamp()),\n",
+ " \"view_count\": 1500,\n",
+ " },\n",
+ " {\n",
+ " \"id\": \"post2\",\n",
+ " \"title\": \"The Future of AI in Healthcare\",\n",
+ " \"content\": \"Artificial intelligence is transforming medical diagnosis and treatment.\",\n",
+ " \"category\": \"health\",\n",
+ " \"published_date\": int((datetime.now() - timedelta(days=15)).timestamp()),\n",
+ " \"view_count\": 2300,\n",
+ " },\n",
+ " {\n",
+ " \"id\": \"post3\",\n",
+ " \"title\": \"Business Analytics with Python\",\n",
+ " \"content\": \"Learn how to use Python for business data analysis and visualization.\",\n",
+ " \"category\": \"business\",\n",
+ " \"published_date\": int((datetime.now() - timedelta(days=45)).timestamp()),\n",
+ " \"view_count\": 980,\n",
+ " },\n",
+ " {\n",
+ " \"id\": \"post4\",\n",
+ " \"title\": \"Deep Learning Neural Networks\",\n",
+ " \"content\": \"Understanding neural networks and their applications in modern AI.\",\n",
+ " \"category\": \"technology\",\n",
+ " \"published_date\": int((datetime.now() - timedelta(days=2)).timestamp()),\n",
+ " \"view_count\": 3200,\n",
+ " },\n",
+ " ]\n",
+ "\n",
+ " # Create source and executor\n",
+ " source = sl.InMemorySource(schema=blog_schema)\n",
+ " executor = sl.InMemoryExecutor(sources=[source], indices=[blog_index])\n",
+ " app = executor.run()\n",
+ "\n",
+ " # Add data to the source after the app is running\n",
+ " source.put(blog_posts)\n",
+ "\n",
+ " # 6. Create Retriever\n",
+ " retriever = SuperlinkedRetriever(\n",
+ " sl_client=app,\n",
+ " sl_query=blog_query,\n",
+ " page_content_field=\"content\",\n",
+ " metadata_fields=[\"title\", \"category\", \"published_date\", \"view_count\"],\n",
+ " )\n",
+ "\n",
+ " # 7. Demonstrate different weighting strategies\n",
+ " scenarios = [\n",
+ " {\n",
+ " \"name\": \"Content-focused search\",\n",
+ " \"params\": {\n",
+ " \"content_weight\": 1.0,\n",
+ " \"title_weight\": 0.3,\n",
+ " \"category_weight\": 0.1,\n",
+ " \"recency_weight\": 0.2,\n",
+ " \"popularity_weight\": 0.1,\n",
+ " \"limit\": 3,\n",
+ " },\n",
+ " },\n",
+ " {\n",
+ " \"name\": \"Recent posts prioritized\",\n",
+ " \"params\": {\n",
+ " \"content_weight\": 0.5,\n",
+ " \"title_weight\": 0.2,\n",
+ " \"category_weight\": 0.1,\n",
+ " \"recency_weight\": 1.0,\n",
+ " \"popularity_weight\": 0.1,\n",
+ " \"limit\": 3,\n",
+ " },\n",
+ " },\n",
+ " {\n",
+ " \"name\": \"Popular posts with category emphasis\",\n",
+ " \"params\": {\n",
+ " \"content_weight\": 0.6,\n",
+ " \"title_weight\": 0.3,\n",
+ " \"category_weight\": 0.8,\n",
+ " \"recency_weight\": 0.3,\n",
+ " \"popularity_weight\": 0.9,\n",
+ " \"limit\": 3,\n",
+ " },\n",
+ " },\n",
+ " ]\n",
+ "\n",
+ " query_text = \"machine learning and AI applications\"\n",
+ "\n",
+ " for scenario in scenarios:\n",
+ " print(f\"\\n--- {scenario['name']} ---\")\n",
+ " print(f\"Query: '{query_text}'\")\n",
+ "\n",
+ " results = retriever.invoke(query_text, **scenario[\"params\"])\n",
+ "\n",
+ " for i, doc in enumerate(results, 1):\n",
+ " print(\n",
+ " f\" {i}. {doc.metadata['title']} (Category: {doc.metadata['category']}, Views: {doc.metadata['view_count']})\"\n",
+ " )\n",
+ "\n",
+ " print()\n",
+ "\n",
+ "\n",
+ "def example_3_ecommerce_product_search():\n",
+ " \"\"\"\n",
+ " Example 3: E-commerce product search\n",
+ " Use case: Product search with price range, brand preference, and ratings\n",
+ " \"\"\"\n",
+ " print(\"=== Example 3: E-commerce Product Search ===\")\n",
+ "\n",
+ " # 1. Define Schema\n",
+ " class ProductSchema(sl.Schema):\n",
+ " id: sl.IdField\n",
+ " name: sl.String\n",
+ " description: sl.String\n",
+ " brand: sl.String\n",
+ " price: sl.Float\n",
+ " rating: sl.Float\n",
+ " category: sl.String\n",
+ "\n",
+ " product_schema = ProductSchema()\n",
+ "\n",
+ " # 2. Define Spaces\n",
+ " description_space = sl.TextSimilaritySpace(\n",
+ " text=product_schema.description, model=\"sentence-transformers/all-MiniLM-L6-v2\"\n",
+ " )\n",
+ "\n",
+ " name_space = sl.TextSimilaritySpace(\n",
+ " text=product_schema.name, model=\"sentence-transformers/all-MiniLM-L6-v2\"\n",
+ " )\n",
+ "\n",
+ " brand_space = sl.CategoricalSimilaritySpace(\n",
+ " category_input=product_schema.brand,\n",
+ " categories=[\"Apple\", \"Samsung\", \"Sony\", \"Nike\", \"Adidas\", \"Canon\"],\n",
+ " )\n",
+ "\n",
+ " category_space = sl.CategoricalSimilaritySpace(\n",
+ " category_input=product_schema.category,\n",
+ " categories=[\"electronics\", \"clothing\", \"sports\", \"photography\"],\n",
+ " )\n",
+ "\n",
+ " # Price space (lower prices get higher scores in MINIMUM mode)\n",
+ " price_space = sl.NumberSpace(\n",
+ " number=product_schema.price,\n",
+ " min_value=10.0,\n",
+ " max_value=2000.0,\n",
+ " mode=sl.Mode.MINIMUM, # Favor lower prices\n",
+ " )\n",
+ "\n",
+ " # Rating space (higher ratings get higher scores)\n",
+ " rating_space = sl.NumberSpace(\n",
+ " number=product_schema.rating,\n",
+ " min_value=1.0,\n",
+ " max_value=5.0,\n",
+ " mode=sl.Mode.MAXIMUM, # Favor higher ratings\n",
+ " )\n",
+ "\n",
+ " # 3. Create Index\n",
+ " product_index = sl.Index(\n",
+ " [\n",
+ " description_space,\n",
+ " name_space,\n",
+ " brand_space,\n",
+ " category_space,\n",
+ " price_space,\n",
+ " rating_space,\n",
+ " ]\n",
+ " )\n",
+ "\n",
+ " # 4. Define Query\n",
+ " product_query = (\n",
+ " sl.Query(\n",
+ " product_index,\n",
+ " weights={\n",
+ " description_space: sl.Param(\"description_weight\"),\n",
+ " name_space: sl.Param(\"name_weight\"),\n",
+ " brand_space: sl.Param(\"brand_weight\"),\n",
+ " category_space: sl.Param(\"category_weight\"),\n",
+ " price_space: sl.Param(\"price_weight\"),\n",
+ " rating_space: sl.Param(\"rating_weight\"),\n",
+ " },\n",
+ " )\n",
+ " .find(product_schema)\n",
+ " .similar(description_space.text, sl.Param(\"query_text\"))\n",
+ " .select(\n",
+ " [\n",
+ " product_schema.name,\n",
+ " product_schema.description,\n",
+ " product_schema.brand,\n",
+ " product_schema.price,\n",
+ " product_schema.rating,\n",
+ " product_schema.category,\n",
+ " ]\n",
+ " )\n",
+ " .limit(sl.Param(\"limit\"))\n",
+ " )\n",
+ "\n",
+ " # 5. Sample product data\n",
+ " products = [\n",
+ " {\n",
+ " \"id\": \"prod1\",\n",
+ " \"name\": \"Wireless Bluetooth Headphones\",\n",
+ " \"description\": \"High-quality wireless headphones with noise cancellation and long battery life.\",\n",
+ " \"brand\": \"Sony\",\n",
+ " \"price\": 299.99,\n",
+ " \"rating\": 4.5,\n",
+ " \"category\": \"electronics\",\n",
+ " },\n",
+ " {\n",
+ " \"id\": \"prod2\",\n",
+ " \"name\": \"Professional DSLR Camera\",\n",
+ " \"description\": \"Full-frame DSLR camera perfect for professional photography and videography.\",\n",
+ " \"brand\": \"Canon\",\n",
+ " \"price\": 1299.99,\n",
+ " \"rating\": 4.8,\n",
+ " \"category\": \"photography\",\n",
+ " },\n",
+ " {\n",
+ " \"id\": \"prod3\",\n",
+ " \"name\": \"Running Shoes\",\n",
+ " \"description\": \"Comfortable running shoes with excellent cushioning and support for athletes.\",\n",
+ " \"brand\": \"Nike\",\n",
+ " \"price\": 129.99,\n",
+ " \"rating\": 4.3,\n",
+ " \"category\": \"sports\",\n",
+ " },\n",
+ " {\n",
+ " \"id\": \"prod4\",\n",
+ " \"name\": \"Smartphone with 5G\",\n",
+ " \"description\": \"Latest smartphone with 5G connectivity, advanced camera, and all-day battery.\",\n",
+ " \"brand\": \"Samsung\",\n",
+ " \"price\": 899.99,\n",
+ " \"rating\": 4.6,\n",
+ " \"category\": \"electronics\",\n",
+ " },\n",
+ " {\n",
+ " \"id\": \"prod5\",\n",
+ " \"name\": \"Bluetooth Speaker\",\n",
+ " \"description\": \"Portable Bluetooth speaker with waterproof design and rich sound quality.\",\n",
+ " \"brand\": \"Sony\",\n",
+ " \"price\": 79.99,\n",
+ " \"rating\": 4.2,\n",
+ " \"category\": \"electronics\",\n",
+ " },\n",
+ " ]\n",
+ "\n",
+ " # Create source and executor\n",
+ " source = sl.InMemorySource(schema=product_schema)\n",
+ " executor = sl.InMemoryExecutor(sources=[source], indices=[product_index])\n",
+ " app = executor.run()\n",
+ "\n",
+ " # Add data to the source after the app is running\n",
+ " source.put(products)\n",
+ "\n",
+ " # 6. Create Retriever\n",
+ " retriever = SuperlinkedRetriever(\n",
+ " sl_client=app,\n",
+ " sl_query=product_query,\n",
+ " page_content_field=\"description\",\n",
+ " metadata_fields=[\"name\", \"brand\", \"price\", \"rating\", \"category\"],\n",
+ " )\n",
+ "\n",
+ " # 7. Demonstrate different search strategies\n",
+ " scenarios = [\n",
+ " {\n",
+ " \"name\": \"Quality-focused search (high ratings matter most)\",\n",
+ " \"query\": \"wireless audio device\",\n",
+ " \"params\": {\n",
+ " \"description_weight\": 0.7,\n",
+ " \"name_weight\": 0.5,\n",
+ " \"brand_weight\": 0.2,\n",
+ " \"category_weight\": 0.3,\n",
+ " \"price_weight\": 0.1,\n",
+ " \"rating_weight\": 1.0, # Prioritize high ratings\n",
+ " \"limit\": 3,\n",
+ " },\n",
+ " },\n",
+ " {\n",
+ " \"name\": \"Budget-conscious search (price matters most)\",\n",
+ " \"query\": \"electronics device\",\n",
+ " \"params\": {\n",
+ " \"description_weight\": 0.6,\n",
+ " \"name_weight\": 0.4,\n",
+ " \"brand_weight\": 0.1,\n",
+ " \"category_weight\": 0.2,\n",
+ " \"price_weight\": 1.0, # Prioritize lower prices\n",
+ " \"rating_weight\": 0.3,\n",
+ " \"limit\": 3,\n",
+ " },\n",
+ " },\n",
+ " {\n",
+ " \"name\": \"Brand-focused search (brand loyalty)\",\n",
+ " \"query\": \"sony products\",\n",
+ " \"params\": {\n",
+ " \"description_weight\": 0.5,\n",
+ " \"name_weight\": 0.3,\n",
+ " \"brand_weight\": 1.0, # Prioritize specific brand\n",
+ " \"category_weight\": 0.2,\n",
+ " \"price_weight\": 0.2,\n",
+ " \"rating_weight\": 0.4,\n",
+ " \"limit\": 3,\n",
+ " },\n",
+ " },\n",
+ " ]\n",
+ "\n",
+ " for scenario in scenarios:\n",
+ " print(f\"\\n--- {scenario['name']} ---\")\n",
+ " print(f\"Query: '{scenario['query']}'\")\n",
+ "\n",
+ " results = retriever.invoke(scenario[\"query\"], **scenario[\"params\"])\n",
+ "\n",
+ " for i, doc in enumerate(results, 1):\n",
+ " metadata = doc.metadata\n",
+ " print(\n",
+ " f\" {i}. {metadata['name']} ({metadata['brand']}) - ${metadata['price']} - β{metadata['rating']}\"\n",
+ " )\n",
+ "\n",
+ " print()\n",
+ "\n",
+ "\n",
+ "def example_4_news_article_search():\n",
+ " \"\"\"\n",
+ " Example 4: News article search with sentiment and topics\n",
+ " Use case: News search with content, sentiment, topic categorization, and recency\n",
+ " \"\"\"\n",
+ " print(\"=== Example 4: News Article Search ===\")\n",
+ "\n",
+ " # 1. Define Schema\n",
+ " class NewsArticleSchema(sl.Schema):\n",
+ " id: sl.IdField\n",
+ " headline: sl.String\n",
+ " content: sl.String\n",
+ " topic: sl.String\n",
+ " sentiment_score: sl.Float # -1 (negative) to 1 (positive)\n",
+ " published_at: sl.Timestamp\n",
+ " source: sl.String\n",
+ "\n",
+ " news_schema = NewsArticleSchema()\n",
+ "\n",
+ " # 2. Define Spaces\n",
+ " content_space = sl.TextSimilaritySpace(\n",
+ " text=news_schema.content, model=\"sentence-transformers/all-MiniLM-L6-v2\"\n",
+ " )\n",
+ "\n",
+ " headline_space = sl.TextSimilaritySpace(\n",
+ " text=news_schema.headline, model=\"sentence-transformers/all-MiniLM-L6-v2\"\n",
+ " )\n",
+ "\n",
+ " topic_space = sl.CategoricalSimilaritySpace(\n",
+ " category_input=news_schema.topic,\n",
+ " categories=[\n",
+ " \"technology\",\n",
+ " \"politics\",\n",
+ " \"business\",\n",
+ " \"sports\",\n",
+ " \"entertainment\",\n",
+ " \"science\",\n",
+ " ],\n",
+ " )\n",
+ "\n",
+ " source_space = sl.CategoricalSimilaritySpace(\n",
+ " category_input=news_schema.source,\n",
+ " categories=[\"Reuters\", \"BBC\", \"CNN\", \"TechCrunch\", \"Bloomberg\"],\n",
+ " )\n",
+ "\n",
+ " # Sentiment space (can be configured to prefer positive or negative news)\n",
+ " sentiment_space = sl.NumberSpace(\n",
+ " number=news_schema.sentiment_score,\n",
+ " min_value=-1.0,\n",
+ " max_value=1.0,\n",
+ " mode=sl.Mode.MAXIMUM, # Default to preferring positive news\n",
+ " )\n",
+ "\n",
+ " # Recency space\n",
+ " recency_space = sl.RecencySpace(\n",
+ " timestamp=news_schema.published_at,\n",
+ " period_time_list=[\n",
+ " sl.PeriodTime(timedelta(hours=6)), # Last 6 hours\n",
+ " sl.PeriodTime(timedelta(days=1)), # Last day\n",
+ " sl.PeriodTime(timedelta(days=7)), # Last week\n",
+ " ],\n",
+ " )\n",
+ "\n",
+ " # 3. Create Index\n",
+ " news_index = sl.Index(\n",
+ " [\n",
+ " content_space,\n",
+ " headline_space,\n",
+ " topic_space,\n",
+ " source_space,\n",
+ " sentiment_space,\n",
+ " recency_space,\n",
+ " ]\n",
+ " )\n",
+ "\n",
+ " # 4. Define Query\n",
+ " news_query = (\n",
+ " sl.Query(\n",
+ " news_index,\n",
+ " weights={\n",
+ " content_space: sl.Param(\"content_weight\"),\n",
+ " headline_space: sl.Param(\"headline_weight\"),\n",
+ " topic_space: sl.Param(\"topic_weight\"),\n",
+ " source_space: sl.Param(\"source_weight\"),\n",
+ " sentiment_space: sl.Param(\"sentiment_weight\"),\n",
+ " recency_space: sl.Param(\"recency_weight\"),\n",
+ " },\n",
+ " )\n",
+ " .find(news_schema)\n",
+ " .similar(content_space.text, sl.Param(\"query_text\"))\n",
+ " .select(\n",
+ " [\n",
+ " news_schema.headline,\n",
+ " news_schema.content,\n",
+ " news_schema.topic,\n",
+ " news_schema.sentiment_score,\n",
+ " news_schema.published_at,\n",
+ " news_schema.source,\n",
+ " ]\n",
+ " )\n",
+ " .limit(sl.Param(\"limit\"))\n",
+ " )\n",
+ "\n",
+ " # 5. Sample news data\n",
+ " # Convert datetime objects to unix timestamps (integers) as required by Timestamp schema field\n",
+ " news_articles = [\n",
+ " {\n",
+ " \"id\": \"news1\",\n",
+ " \"headline\": \"Major Breakthrough in AI Research Announced\",\n",
+ " \"content\": \"Scientists have developed a new artificial intelligence model that shows remarkable improvements in natural language understanding.\",\n",
+ " \"topic\": \"technology\",\n",
+ " \"sentiment_score\": 0.8,\n",
+ " \"published_at\": int((datetime.now() - timedelta(hours=2)).timestamp()),\n",
+ " \"source\": \"TechCrunch\",\n",
+ " },\n",
+ " {\n",
+ " \"id\": \"news2\",\n",
+ " \"headline\": \"Stock Market Faces Volatility Amid Economic Concerns\",\n",
+ " \"content\": \"Financial markets experienced significant fluctuations today as investors react to new economic data and policy announcements.\",\n",
+ " \"topic\": \"business\",\n",
+ " \"sentiment_score\": -0.3,\n",
+ " \"published_at\": int((datetime.now() - timedelta(hours=8)).timestamp()),\n",
+ " \"source\": \"Bloomberg\",\n",
+ " },\n",
+ " {\n",
+ " \"id\": \"news3\",\n",
+ " \"headline\": \"New Climate Research Shows Promising Results\",\n",
+ " \"content\": \"Recent studies indicate that innovative climate technologies are showing positive environmental impact and could help address climate change.\",\n",
+ " \"topic\": \"science\",\n",
+ " \"sentiment_score\": 0.6,\n",
+ " \"published_at\": int((datetime.now() - timedelta(hours=12)).timestamp()),\n",
+ " \"source\": \"Reuters\",\n",
+ " },\n",
+ " {\n",
+ " \"id\": \"news4\",\n",
+ " \"headline\": \"Tech Companies Report Strong Quarterly Earnings\",\n",
+ " \"content\": \"Several major technology companies exceeded expectations in their quarterly earnings reports, driven by AI and cloud computing growth.\",\n",
+ " \"topic\": \"technology\",\n",
+ " \"sentiment_score\": 0.7,\n",
+ " \"published_at\": int((datetime.now() - timedelta(hours=4)).timestamp()),\n",
+ " \"source\": \"CNN\",\n",
+ " },\n",
+ " ]\n",
+ "\n",
+ " # Create source and executor\n",
+ " source = sl.InMemorySource(schema=news_schema)\n",
+ " executor = sl.InMemoryExecutor(sources=[source], indices=[news_index])\n",
+ " app = executor.run()\n",
+ "\n",
+ " # Add data to the source after the app is running\n",
+ " source.put(news_articles)\n",
+ "\n",
+ " # 6. Create Retriever\n",
+ " retriever = SuperlinkedRetriever(\n",
+ " sl_client=app,\n",
+ " sl_query=news_query,\n",
+ " page_content_field=\"content\",\n",
+ " metadata_fields=[\n",
+ " \"headline\",\n",
+ " \"topic\",\n",
+ " \"sentiment_score\",\n",
+ " \"published_at\",\n",
+ " \"source\",\n",
+ " ],\n",
+ " )\n",
+ "\n",
+ " # 7. Demonstrate different news search strategies\n",
+ " print(f\"Query: 'artificial intelligence developments'\")\n",
+ "\n",
+ " # Recent technology news\n",
+ " results = retriever.invoke(\n",
+ " \"artificial intelligence developments\",\n",
+ " content_weight=0.8,\n",
+ " headline_weight=0.6,\n",
+ " topic_weight=0.4,\n",
+ " source_weight=0.2,\n",
+ " sentiment_weight=0.3,\n",
+ " recency_weight=1.0, # Prioritize recent news\n",
+ " limit=2,\n",
+ " )\n",
+ "\n",
+ " print(\"\\nRecent Technology News:\")\n",
+ " for i, doc in enumerate(results, 1):\n",
+ " metadata = doc.metadata\n",
+ " published_timestamp = metadata[\"published_at\"]\n",
+ " # Convert unix timestamp back to datetime for display calculation\n",
+ " published_time = datetime.fromtimestamp(published_timestamp)\n",
+ " hours_ago = (datetime.now() - published_time).total_seconds() / 3600\n",
+ " sentiment = (\n",
+ " \"π Positive\"\n",
+ " if metadata[\"sentiment_score\"] > 0\n",
+ " else \"π Negative\"\n",
+ " if metadata[\"sentiment_score\"] < 0\n",
+ " else \"β‘οΈ Neutral\"\n",
+ " )\n",
+ "\n",
+ " print(f\" {i}. {metadata['headline']}\")\n",
+ " print(f\" Source: {metadata['source']} | {sentiment} | {hours_ago:.1f}h ago\")\n",
+ "\n",
+ " print()\n",
+ "\n",
+ "\n",
+ "def demonstrate_langchain_integration():\n",
+ " \"\"\"\n",
+ " Example 5: Integration with LangChain RAG pipeline\n",
+ " Shows how to use the SuperlinkedRetriever in a complete RAG workflow\n",
+ " \"\"\"\n",
+ " print(\"=== Example 5: LangChain RAG Integration ===\")\n",
+ "\n",
+ " # This would typically be used with an actual LLM\n",
+ " # For demo purposes, we'll just show the retrieval part\n",
+ "\n",
+ " # Quick setup of a simple retriever\n",
+ " class FAQSchema(sl.Schema):\n",
+ " id: sl.IdField\n",
+ " question: sl.String\n",
+ " answer: sl.String\n",
+ " category: sl.String\n",
+ "\n",
+ " faq_schema = FAQSchema()\n",
+ "\n",
+ " text_space = sl.TextSimilaritySpace(\n",
+ " text=faq_schema.question, model=\"sentence-transformers/all-MiniLM-L6-v2\"\n",
+ " )\n",
+ "\n",
+ " category_space = sl.CategoricalSimilaritySpace(\n",
+ " category_input=faq_schema.category,\n",
+ " categories=[\"technical\", \"billing\", \"general\", \"account\"],\n",
+ " )\n",
+ "\n",
+ " faq_index = sl.Index([text_space, category_space])\n",
+ "\n",
+ " faq_query = (\n",
+ " sl.Query(\n",
+ " faq_index,\n",
+ " weights={\n",
+ " text_space: sl.Param(\"text_weight\"),\n",
+ " category_space: sl.Param(\"category_weight\"),\n",
+ " },\n",
+ " )\n",
+ " .find(faq_schema)\n",
+ " .similar(text_space.text, sl.Param(\"query_text\"))\n",
+ " .select([faq_schema.question, faq_schema.answer, faq_schema.category])\n",
+ " .limit(sl.Param(\"limit\"))\n",
+ " )\n",
+ "\n",
+ " # Sample FAQ data\n",
+ " faqs = [\n",
+ " {\n",
+ " \"id\": \"faq1\",\n",
+ " \"question\": \"How do I reset my password?\",\n",
+ " \"answer\": \"You can reset your password by clicking 'Forgot Password' on the login page and following the email instructions.\",\n",
+ " \"category\": \"account\",\n",
+ " },\n",
+ " {\n",
+ " \"id\": \"faq2\",\n",
+ " \"question\": \"Why is my API not working?\",\n",
+ " \"answer\": \"Check your API key, rate limits, and ensure you're using the correct endpoint URL.\",\n",
+ " \"category\": \"technical\",\n",
+ " },\n",
+ " {\n",
+ " \"id\": \"faq3\",\n",
+ " \"question\": \"How do I upgrade my subscription?\",\n",
+ " \"answer\": \"Visit the billing section in your account settings to upgrade your plan.\",\n",
+ " \"category\": \"billing\",\n",
+ " },\n",
+ " ]\n",
+ "\n",
+ " # Create source and executor\n",
+ " source = sl.InMemorySource(schema=faq_schema)\n",
+ " executor = sl.InMemoryExecutor(sources=[source], indices=[faq_index])\n",
+ " app = executor.run()\n",
+ "\n",
+ " # Add data to the source after the app is running\n",
+ " source.put(faqs)\n",
+ "\n",
+ " retriever = SuperlinkedRetriever(\n",
+ " sl_client=app,\n",
+ " sl_query=faq_query,\n",
+ " page_content_field=\"answer\",\n",
+ " metadata_fields=[\"question\", \"category\"],\n",
+ " )\n",
+ "\n",
+ " # Simulate a RAG query\n",
+ " user_question = \"I can't access the API\"\n",
+ "\n",
+ " print(f\"User Question: '{user_question}'\")\n",
+ " print(\"Retrieving relevant context...\")\n",
+ "\n",
+ " context_docs = retriever.invoke(\n",
+ " user_question, text_weight=1.0, category_weight=0.3, limit=2\n",
+ " )\n",
+ "\n",
+ " print(\"\\nRetrieved Context:\")\n",
+ " for i, doc in enumerate(context_docs, 1):\n",
+ " print(f\" {i}. Q: {doc.metadata['question']}\")\n",
+ " print(f\" A: {doc.page_content}\")\n",
+ " print(f\" Category: {doc.metadata['category']}\")\n",
+ "\n",
+ " print(\n",
+ " \"\\n[In a real RAG setup, this context would be passed to an LLM to generate a response]\"\n",
+ " )\n",
+ " print()\n",
+ "\n",
+ "\n",
+ "def example_6_qdrant_vector_database():\n",
+ " \"\"\"\n",
+ " Example 6: Same retriever with Qdrant vector database\n",
+ " Use case: Production deployment with persistent vector storage\n",
+ "\n",
+ " This demonstrates that SuperlinkedRetriever is vector database agnostic.\n",
+ " The SAME retriever code works with Qdrant (or Redis, MongoDB) by only\n",
+ " changing the executor configuration, not the retriever implementation.\n",
+ " \"\"\"\n",
+ " print(\"=== Example 6: Qdrant Vector Database ===\")\n",
+ "\n",
+ " # 1. Define Schema (IDENTICAL to Example 1)\n",
+ " class DocumentSchema(sl.Schema):\n",
+ " id: sl.IdField\n",
+ " content: sl.String\n",
+ "\n",
+ " doc_schema = DocumentSchema()\n",
+ "\n",
+ " # 2. Define Space and Index (IDENTICAL to Example 1)\n",
+ " text_space = sl.TextSimilaritySpace(\n",
+ " text=doc_schema.content, model=\"sentence-transformers/all-MiniLM-L6-v2\"\n",
+ " )\n",
+ "\n",
+ " doc_index = sl.Index([text_space])\n",
+ "\n",
+ " # 3. Define Query (IDENTICAL to Example 1)\n",
+ " query = (\n",
+ " sl.Query(doc_index)\n",
+ " .find(doc_schema)\n",
+ " .similar(text_space.text, sl.Param(\"query_text\"))\n",
+ " .select([doc_schema.content])\n",
+ " .limit(sl.Param(\"limit\"))\n",
+ " )\n",
+ "\n",
+ " # 4. Configure Qdrant Vector Database (ONLY DIFFERENCE!)\n",
+ " print(\"π§ Configuring Qdrant vector database...\")\n",
+ " try:\n",
+ " qdrant_vector_db = sl.QdrantVectorDatabase(\n",
+ " url=\"https://your-qdrant-cluster.qdrant.io\", # Replace with your Qdrant URL\n",
+ " api_key=\"your-api-key-here\", # Replace with your API key\n",
+ " default_query_limit=10,\n",
+ " vector_precision=sl.Precision.FLOAT16,\n",
+ " )\n",
+ " print(\"Qdrant configuration created (credentials needed for actual connection)\")\n",
+ " except Exception as e:\n",
+ " print(f\"Qdrant not configured (expected without credentials): {e}\")\n",
+ " print(\"Using in-memory fallback for demonstration...\")\n",
+ " qdrant_vector_db = None\n",
+ "\n",
+ " # 5. Set up data and app (SLIGHT DIFFERENCE - vector database parameter)\n",
+ " documents = [\n",
+ " {\n",
+ " \"id\": \"doc1\",\n",
+ " \"content\": \"Machine learning algorithms can process large datasets efficiently.\",\n",
+ " },\n",
+ " {\n",
+ " \"id\": \"doc2\",\n",
+ " \"content\": \"Natural language processing enables computers to understand human language.\",\n",
+ " },\n",
+ " {\n",
+ " \"id\": \"doc3\",\n",
+ " \"content\": \"Deep learning models require significant computational resources.\",\n",
+ " },\n",
+ " {\n",
+ " \"id\": \"doc4\",\n",
+ " \"content\": \"Data science combines statistics, programming, and domain expertise.\",\n",
+ " },\n",
+ " {\n",
+ " \"id\": \"doc5\",\n",
+ " \"content\": \"Artificial intelligence is transforming various industries.\",\n",
+ " },\n",
+ " ]\n",
+ "\n",
+ " # Create source and executor with Qdrant (or fallback to in-memory)\n",
+ " source = sl.InMemorySource(schema=doc_schema)\n",
+ "\n",
+ " if qdrant_vector_db:\n",
+ " # Production setup with Qdrant\n",
+ " executor = sl.InMemoryExecutor(\n",
+ " sources=[source],\n",
+ " indices=[doc_index],\n",
+ " vector_database=qdrant_vector_db, # This makes it use Qdrant!\n",
+ " )\n",
+ " storage_type = \"Qdrant (persistent)\"\n",
+ " else:\n",
+ " # Fallback to in-memory for demo\n",
+ " executor = sl.InMemoryExecutor(sources=[source], indices=[doc_index])\n",
+ " storage_type = \"In-Memory (fallback)\"\n",
+ "\n",
+ " app = executor.run()\n",
+ "\n",
+ " # Add data to the source after the app is running\n",
+ " source.put(documents)\n",
+ "\n",
+ " # 6. Create Retriever (IDENTICAL CODE!)\n",
+ " retriever = SuperlinkedRetriever(\n",
+ " sl_client=app, sl_query=query, page_content_field=\"content\"\n",
+ " )\n",
+ "\n",
+ " # 7. Use the retriever (IDENTICAL CODE!)\n",
+ " results = retriever.invoke(\"artificial intelligence and machine learning\", limit=3)\n",
+ "\n",
+ " print(f\"Vector Storage: {storage_type}\")\n",
+ " print(f\"Query: 'artificial intelligence and machine learning'\")\n",
+ " print(f\"Found {len(results)} documents:\")\n",
+ " for i, doc in enumerate(results, 1):\n",
+ " print(f\" {i}. {doc.page_content}\")\n",
+ "\n",
+ " print(\n",
+ " \"\\nKey Insight: Same SuperlinkedRetriever code works with any vector database!\"\n",
+ " )\n",
+ " print(\n",
+ " \"Only executor configuration changes, retriever implementation stays identical\"\n",
+ " )\n",
+ " print(\"Switch between in-memory β Qdrant β Redis β MongoDB without code changes\")\n",
+ " print()\n",
+ "\n",
+ "\n",
+ "def main():\n",
+ " \"\"\"\n",
+ " Run all examples to demonstrate the flexibility of SuperlinkedRetriever\n",
+ " \"\"\"\n",
+ " print(\"SuperlinkedRetriever Examples\")\n",
+ " print(\"=\" * 50)\n",
+ " print(\"This file demonstrates how the SuperlinkedRetriever can be used\")\n",
+ " print(\"with different space configurations for various use cases.\\n\")\n",
+ "\n",
+ " try:\n",
+ " example_1_simple_text_search()\n",
+ " example_2_multi_space_blog_search()\n",
+ " example_3_ecommerce_product_search()\n",
+ " example_4_news_article_search()\n",
+ " demonstrate_langchain_integration()\n",
+ " example_6_qdrant_vector_database()\n",
+ "\n",
+ " print(\"All examples completed successfully!\")\n",
+ "\n",
+ " except Exception as e:\n",
+ " print(f\"Error running examples: {e}\")\n",
+ " print(\"Make sure you have 'superlinked' package installed:\")\n",
+ " print(\"pip install superlinked\")"
+ ]
+ }
+ ],
+ "metadata": {
+ "language_info": {
+ "name": "python"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/docs/docs/integrations/retrievers/superlinked_examples.ipynb b/docs/docs/integrations/retrievers/superlinked_examples.ipynb
new file mode 100644
index 0000000000000..4d27243989a9a
--- /dev/null
+++ b/docs/docs/integrations/retrievers/superlinked_examples.ipynb
@@ -0,0 +1,204 @@
+{
+ "cells": [
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "# SuperlinkedRetriever Examples\n",
+ "\n",
+ "This notebook demonstrates how to build a Superlinked App and Query Descriptor and use them with the LangChain `SuperlinkedRetriever`.\n",
+ "\n",
+ "Install the integration from PyPI:\n",
+ "\n",
+ "```bash\n",
+ "pip install -U langchain-superlinked superlinked\n",
+ "```\n"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## Setup\n",
+ "\n",
+ "Install the integration and its peer dependency:\n",
+ "\n",
+ "```bash\n",
+ "pip install -U langchain-superlinked superlinked\n",
+ "```\n",
+ "\n",
+ "## Instantiation\n",
+ "\n",
+ "See below for creating a Superlinked App (`sl_client`) and a `QueryDescriptor` (`sl_query`), then wiring them into `SuperlinkedRetriever`.\n",
+ "\n",
+ "## Usage\n",
+ "\n",
+ "Call `retriever.invoke(query_text, **params)` to retrieve `Document` objects. Examples below show single-space and multi-space setups.\n",
+ "\n",
+ "## Use within a chain\n",
+ "\n",
+ "The retriever can be used in LangChain chains by piping it into your prompt and model. See the main Superlinked retriever page for a full RAG example.\n",
+ "\n",
+ "## API reference\n",
+ "\n",
+ "Refer to the API docs:\n",
+ "\n",
+ "- https://python.langchain.com/api_reference/superlinked/retrievers/langchain_superlinked.retrievers.SuperlinkedRetriever.html\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "import superlinked.framework as sl\n",
+ "from langchain_superlinked import SuperlinkedRetriever\n",
+ "from datetime import timedelta\n",
+ "\n",
+ "\n",
+ "# Define schema\n",
+ "class DocumentSchema(sl.Schema):\n",
+ " id: sl.IdField\n",
+ " content: sl.String\n",
+ "\n",
+ "\n",
+ "doc_schema = DocumentSchema()\n",
+ "\n",
+ "# Space + index\n",
+ "text_space = sl.TextSimilaritySpace(\n",
+ " text=doc_schema.content, model=\"sentence-transformers/all-MiniLM-L6-v2\"\n",
+ ")\n",
+ "doc_index = sl.Index([text_space])\n",
+ "\n",
+ "# Query descriptor\n",
+ "query = (\n",
+ " sl.Query(doc_index)\n",
+ " .find(doc_schema)\n",
+ " .similar(text_space.text, sl.Param(\"query_text\"))\n",
+ " .select([doc_schema.content])\n",
+ " .limit(sl.Param(\"limit\"))\n",
+ ")\n",
+ "\n",
+ "# Minimal app\n",
+ "source = sl.InMemorySource(schema=doc_schema)\n",
+ "executor = sl.InMemoryExecutor(sources=[source], indices=[doc_index])\n",
+ "app = executor.run()\n",
+ "\n",
+ "# Data\n",
+ "source.put(\n",
+ " [\n",
+ " {\"id\": \"1\", \"content\": \"Machine learning algorithms process data efficiently.\"},\n",
+ " {\n",
+ " \"id\": \"2\",\n",
+ " \"content\": \"Natural language processing understands human language.\",\n",
+ " },\n",
+ " {\"id\": \"3\", \"content\": \"Deep learning models require significant compute.\"},\n",
+ " ]\n",
+ ")\n",
+ "\n",
+ "# Retriever\n",
+ "retriever = SuperlinkedRetriever(\n",
+ " sl_client=app, sl_query=query, page_content_field=\"content\"\n",
+ ")\n",
+ "\n",
+ "retriever.invoke(\"artificial intelligence\", limit=2)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Multi-space example (blog posts)\n",
+ "class BlogPostSchema(sl.Schema):\n",
+ " id: sl.IdField\n",
+ " title: sl.String\n",
+ " content: sl.String\n",
+ " category: sl.String\n",
+ " published_date: sl.Timestamp\n",
+ "\n",
+ "\n",
+ "blog = BlogPostSchema()\n",
+ "\n",
+ "content_space = sl.TextSimilaritySpace(\n",
+ " text=blog.content, model=\"sentence-transformers/all-MiniLM-L6-v2\"\n",
+ ")\n",
+ "title_space = sl.TextSimilaritySpace(\n",
+ " text=blog.title, model=\"sentence-transformers/all-MiniLM-L6-v2\"\n",
+ ")\n",
+ "cat_space = sl.CategoricalSimilaritySpace(\n",
+ " category_input=blog.category, categories=[\"technology\", \"science\", \"business\"]\n",
+ ")\n",
+ "recency_space = sl.RecencySpace(\n",
+ " timestamp=blog.published_date,\n",
+ " period_time_list=[\n",
+ " sl.PeriodTime(timedelta(days=30)),\n",
+ " sl.PeriodTime(timedelta(days=90)),\n",
+ " ],\n",
+ ")\n",
+ "\n",
+ "blog_index = sl.Index([content_space, title_space, cat_space, recency_space])\n",
+ "\n",
+ "blog_query = (\n",
+ " sl.Query(\n",
+ " blog_index,\n",
+ " weights={\n",
+ " content_space: sl.Param(\"content_weight\"),\n",
+ " title_space: sl.Param(\"title_weight\"),\n",
+ " cat_space: sl.Param(\"category_weight\"),\n",
+ " recency_space: sl.Param(\"recency_weight\"),\n",
+ " },\n",
+ " )\n",
+ " .find(blog)\n",
+ " .similar(content_space.text, sl.Param(\"query_text\"))\n",
+ " .select([blog.title, blog.content, blog.category, blog.published_date])\n",
+ " .limit(sl.Param(\"limit\"))\n",
+ ")\n",
+ "\n",
+ "source = sl.InMemorySource(schema=blog)\n",
+ "app = sl.InMemoryExecutor(sources=[source], indices=[blog_index]).run()\n",
+ "\n",
+ "from datetime import datetime\n",
+ "\n",
+ "source.put(\n",
+ " [\n",
+ " {\n",
+ " \"id\": \"p1\",\n",
+ " \"title\": \"Intro to ML\",\n",
+ " \"content\": \"Machine learning 101\",\n",
+ " \"category\": \"technology\",\n",
+ " \"published_date\": int((datetime.now() - timedelta(days=5)).timestamp()),\n",
+ " },\n",
+ " {\n",
+ " \"id\": \"p2\",\n",
+ " \"title\": \"AI in Healthcare\",\n",
+ " \"content\": \"Transforming diagnosis\",\n",
+ " \"category\": \"science\",\n",
+ " \"published_date\": int((datetime.now() - timedelta(days=15)).timestamp()),\n",
+ " },\n",
+ " ]\n",
+ ")\n",
+ "\n",
+ "blog_retriever = SuperlinkedRetriever(\n",
+ " sl_client=app,\n",
+ " sl_query=blog_query,\n",
+ " page_content_field=\"content\",\n",
+ " metadata_fields=[\"title\", \"category\", \"published_date\"],\n",
+ ")\n",
+ "\n",
+ "blog_retriever.invoke(\n",
+ " \"machine learning\", content_weight=1.0, recency_weight=0.5, limit=2\n",
+ ")"
+ ]
+ }
+ ],
+ "metadata": {
+ "language_info": {
+ "name": "python"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/docs/docs/integrations/retrievers/superlinked_examples.py b/docs/docs/integrations/retrievers/superlinked_examples.py
new file mode 100644
index 0000000000000..7c0d2bf8337a4
--- /dev/null
+++ b/docs/docs/integrations/retrievers/superlinked_examples.py
@@ -0,0 +1,954 @@
+"""
+SuperlinkedRetriever Usage Examples
+
+This file demonstrates how to use the SuperlinkedRetriever with different
+space configurations to showcase its flexibility across various use cases.
+"""
+# ruff: noqa: T201, E501
+# mypy: ignore-errors
+
+from datetime import datetime, timedelta
+
+import superlinked.framework as sl
+
+from langchain_superlinked import SuperlinkedRetriever
+
+
+def example_1_simple_text_search():
+ """
+ Example 1: Simple text-based semantic search
+ Use case: Basic document retrieval based on content similarity
+ """
+ print("=== Example 1: Simple Text Search ===")
+
+ # 1. Define Schema
+ class DocumentSchema(sl.Schema):
+ id: sl.IdField
+ content: sl.String
+
+ doc_schema = DocumentSchema()
+
+ # 2. Define Space and Index
+ text_space = sl.TextSimilaritySpace(
+ text=doc_schema.content, model="sentence-transformers/all-MiniLM-L6-v2"
+ )
+
+ doc_index = sl.Index([text_space])
+
+ # 3. Define Query
+ query = (
+ sl.Query(doc_index)
+ .find(doc_schema)
+ .similar(text_space.text, sl.Param("query_text"))
+ .select([doc_schema.content])
+ .limit(sl.Param("limit"))
+ )
+
+ # 4. Set up data and app using executor pattern
+ documents = [
+ {
+ "id": "doc1",
+ "content": "Machine learning algorithms can process large datasets efficiently.",
+ },
+ {
+ "id": "doc2",
+ "content": "Natural language processing enables computers to understand human language.",
+ },
+ {
+ "id": "doc3",
+ "content": "Deep learning models require significant computational resources.",
+ },
+ {
+ "id": "doc4",
+ "content": "Data science combines statistics, programming, and domain expertise.",
+ },
+ {
+ "id": "doc5",
+ "content": "Artificial intelligence is transforming various industries.",
+ },
+ ]
+
+ # Create source and executor
+ source = sl.InMemorySource(schema=doc_schema)
+ executor = sl.InMemoryExecutor(sources=[source], indices=[doc_index])
+ app = executor.run()
+
+ # Add data to the source after the app is running
+ source.put(documents)
+
+ # 5. Create Retriever
+ retriever = SuperlinkedRetriever(
+ sl_client=app, sl_query=query, page_content_field="content"
+ )
+
+ # 6. Use the retriever
+ results = retriever.invoke("artificial intelligence and machine learning", limit=3)
+
+ print("Query: 'artificial intelligence and machine learning'")
+ print(f"Found {len(results)} documents:")
+ for i, doc in enumerate(results, 1):
+ print(f" {i}. {doc.page_content}")
+ print()
+
+
+def example_2_multi_space_blog_search():
+ """
+ Example 2: Multi-space blog post search
+ Use case: Blog search with content, category, and recency
+ """
+ print("=== Example 2: Multi-Space Blog Search ===")
+
+ # 1. Define Schema
+ class BlogPostSchema(sl.Schema):
+ id: sl.IdField
+ title: sl.String
+ content: sl.String
+ category: sl.String
+ published_date: sl.Timestamp
+ view_count: sl.Integer
+
+ blog_schema = BlogPostSchema()
+
+ # 2. Define Multiple Spaces
+ # Text similarity for content
+ content_space = sl.TextSimilaritySpace(
+ text=blog_schema.content, model="sentence-transformers/all-MiniLM-L6-v2"
+ )
+
+ # Title similarity
+ title_space = sl.TextSimilaritySpace(
+ text=blog_schema.title, model="sentence-transformers/all-MiniLM-L6-v2"
+ )
+
+ # Category similarity
+ category_space = sl.CategoricalSimilaritySpace(
+ category_input=blog_schema.category,
+ categories=["technology", "science", "business", "health", "travel"],
+ )
+
+ # Recency (favor recent posts)
+ recency_space = sl.RecencySpace(
+ timestamp=blog_schema.published_date,
+ period_time_list=[
+ sl.PeriodTime(timedelta(days=30)), # Last month
+ sl.PeriodTime(timedelta(days=90)), # Last 3 months
+ sl.PeriodTime(timedelta(days=365)), # Last year
+ ],
+ )
+
+ # Popularity (based on view count)
+ popularity_space = sl.NumberSpace(
+ number=blog_schema.view_count,
+ min_value=0,
+ max_value=10000,
+ mode=sl.Mode.MAXIMUM,
+ )
+
+ # 3. Create Index
+ blog_index = sl.Index(
+ [content_space, title_space, category_space, recency_space, popularity_space]
+ )
+
+ # 4. Define Query with multiple weighted spaces
+ blog_query = (
+ sl.Query(
+ blog_index,
+ weights={
+ content_space: sl.Param("content_weight"),
+ title_space: sl.Param("title_weight"),
+ category_space: sl.Param("category_weight"),
+ recency_space: sl.Param("recency_weight"),
+ popularity_space: sl.Param("popularity_weight"),
+ },
+ )
+ .find(blog_schema)
+ .similar(content_space.text, sl.Param("query_text"))
+ .select(
+ [
+ blog_schema.title,
+ blog_schema.content,
+ blog_schema.category,
+ blog_schema.published_date,
+ blog_schema.view_count,
+ ]
+ )
+ .limit(sl.Param("limit"))
+ )
+
+ # 5. Sample blog data
+ from datetime import datetime
+
+ # Convert datetime objects to unix timestamps (integers) as required by Timestamp schema field
+ blog_posts = [
+ {
+ "id": "post1",
+ "title": "Introduction to Machine Learning",
+ "content": "Machine learning is revolutionizing how we process data and make predictions.",
+ "category": "technology",
+ "published_date": int((datetime.now() - timedelta(days=5)).timestamp()),
+ "view_count": 1500,
+ },
+ {
+ "id": "post2",
+ "title": "The Future of AI in Healthcare",
+ "content": "Artificial intelligence is transforming medical diagnosis and treatment.",
+ "category": "health",
+ "published_date": int((datetime.now() - timedelta(days=15)).timestamp()),
+ "view_count": 2300,
+ },
+ {
+ "id": "post3",
+ "title": "Business Analytics with Python",
+ "content": "Learn how to use Python for business data analysis and visualization.",
+ "category": "business",
+ "published_date": int((datetime.now() - timedelta(days=45)).timestamp()),
+ "view_count": 980,
+ },
+ {
+ "id": "post4",
+ "title": "Deep Learning Neural Networks",
+ "content": "Understanding neural networks and their applications in modern AI.",
+ "category": "technology",
+ "published_date": int((datetime.now() - timedelta(days=2)).timestamp()),
+ "view_count": 3200,
+ },
+ ]
+
+ # Create source and executor
+ source = sl.InMemorySource(schema=blog_schema)
+ executor = sl.InMemoryExecutor(sources=[source], indices=[blog_index])
+ app = executor.run()
+
+ # Add data to the source after the app is running
+ source.put(blog_posts)
+
+ # 6. Create Retriever
+ retriever = SuperlinkedRetriever(
+ sl_client=app,
+ sl_query=blog_query,
+ page_content_field="content",
+ metadata_fields=["title", "category", "published_date", "view_count"],
+ )
+
+ # 7. Demonstrate different weighting strategies
+ scenarios = [
+ {
+ "name": "Content-focused search",
+ "params": {
+ "content_weight": 1.0,
+ "title_weight": 0.3,
+ "category_weight": 0.1,
+ "recency_weight": 0.2,
+ "popularity_weight": 0.1,
+ "limit": 3,
+ },
+ },
+ {
+ "name": "Recent posts prioritized",
+ "params": {
+ "content_weight": 0.5,
+ "title_weight": 0.2,
+ "category_weight": 0.1,
+ "recency_weight": 1.0,
+ "popularity_weight": 0.1,
+ "limit": 3,
+ },
+ },
+ {
+ "name": "Popular posts with category emphasis",
+ "params": {
+ "content_weight": 0.6,
+ "title_weight": 0.3,
+ "category_weight": 0.8,
+ "recency_weight": 0.3,
+ "popularity_weight": 0.9,
+ "limit": 3,
+ },
+ },
+ ]
+
+ query_text = "machine learning and AI applications"
+
+ for scenario in scenarios:
+ print(f"\n--- {scenario['name']} ---")
+ print(f"Query: '{query_text}'")
+
+ results = retriever.invoke(query_text, **scenario["params"])
+
+ for i, doc in enumerate(results, 1):
+ print(
+ f" {i}. {doc.metadata['title']} (Category: {doc.metadata['category']}, Views: {doc.metadata['view_count']})"
+ )
+
+ print()
+
+
+def example_3_ecommerce_product_search():
+ """
+ Example 3: E-commerce product search
+ Use case: Product search with price range, brand preference, and ratings
+ """
+ print("=== Example 3: E-commerce Product Search ===")
+
+ # 1. Define Schema
+ class ProductSchema(sl.Schema):
+ id: sl.IdField
+ name: sl.String
+ description: sl.String
+ brand: sl.String
+ price: sl.Float
+ rating: sl.Float
+ category: sl.String
+
+ product_schema = ProductSchema()
+
+ # 2. Define Spaces
+ description_space = sl.TextSimilaritySpace(
+ text=product_schema.description, model="sentence-transformers/all-MiniLM-L6-v2"
+ )
+
+ name_space = sl.TextSimilaritySpace(
+ text=product_schema.name, model="sentence-transformers/all-MiniLM-L6-v2"
+ )
+
+ brand_space = sl.CategoricalSimilaritySpace(
+ category_input=product_schema.brand,
+ categories=["Apple", "Samsung", "Sony", "Nike", "Adidas", "Canon"],
+ )
+
+ category_space = sl.CategoricalSimilaritySpace(
+ category_input=product_schema.category,
+ categories=["electronics", "clothing", "sports", "photography"],
+ )
+
+ # Price space (lower prices get higher scores in MINIMUM mode)
+ price_space = sl.NumberSpace(
+ number=product_schema.price,
+ min_value=10.0,
+ max_value=2000.0,
+ mode=sl.Mode.MINIMUM, # Favor lower prices
+ )
+
+ # Rating space (higher ratings get higher scores)
+ rating_space = sl.NumberSpace(
+ number=product_schema.rating,
+ min_value=1.0,
+ max_value=5.0,
+ mode=sl.Mode.MAXIMUM, # Favor higher ratings
+ )
+
+ # 3. Create Index
+ product_index = sl.Index(
+ [
+ description_space,
+ name_space,
+ brand_space,
+ category_space,
+ price_space,
+ rating_space,
+ ]
+ )
+
+ # 4. Define Query
+ product_query = (
+ sl.Query(
+ product_index,
+ weights={
+ description_space: sl.Param("description_weight"),
+ name_space: sl.Param("name_weight"),
+ brand_space: sl.Param("brand_weight"),
+ category_space: sl.Param("category_weight"),
+ price_space: sl.Param("price_weight"),
+ rating_space: sl.Param("rating_weight"),
+ },
+ )
+ .find(product_schema)
+ .similar(description_space.text, sl.Param("query_text"))
+ .select(
+ [
+ product_schema.name,
+ product_schema.description,
+ product_schema.brand,
+ product_schema.price,
+ product_schema.rating,
+ product_schema.category,
+ ]
+ )
+ .limit(sl.Param("limit"))
+ )
+
+ # 5. Sample product data
+ products = [
+ {
+ "id": "prod1",
+ "name": "Wireless Bluetooth Headphones",
+ "description": "High-quality wireless headphones with noise cancellation and long battery life.",
+ "brand": "Sony",
+ "price": 299.99,
+ "rating": 4.5,
+ "category": "electronics",
+ },
+ {
+ "id": "prod2",
+ "name": "Professional DSLR Camera",
+ "description": "Full-frame DSLR camera perfect for professional photography and videography.",
+ "brand": "Canon",
+ "price": 1299.99,
+ "rating": 4.8,
+ "category": "photography",
+ },
+ {
+ "id": "prod3",
+ "name": "Running Shoes",
+ "description": "Comfortable running shoes with excellent cushioning and support for athletes.",
+ "brand": "Nike",
+ "price": 129.99,
+ "rating": 4.3,
+ "category": "sports",
+ },
+ {
+ "id": "prod4",
+ "name": "Smartphone with 5G",
+ "description": "Latest smartphone with 5G connectivity, advanced camera, and all-day battery.",
+ "brand": "Samsung",
+ "price": 899.99,
+ "rating": 4.6,
+ "category": "electronics",
+ },
+ {
+ "id": "prod5",
+ "name": "Bluetooth Speaker",
+ "description": "Portable Bluetooth speaker with waterproof design and rich sound quality.",
+ "brand": "Sony",
+ "price": 79.99,
+ "rating": 4.2,
+ "category": "electronics",
+ },
+ ]
+
+ # Create source and executor
+ source = sl.InMemorySource(schema=product_schema)
+ executor = sl.InMemoryExecutor(sources=[source], indices=[product_index])
+ app = executor.run()
+
+ # Add data to the source after the app is running
+ source.put(products)
+
+ # 6. Create Retriever
+ retriever = SuperlinkedRetriever(
+ sl_client=app,
+ sl_query=product_query,
+ page_content_field="description",
+ metadata_fields=["name", "brand", "price", "rating", "category"],
+ )
+
+ # 7. Demonstrate different search strategies
+ scenarios = [
+ {
+ "name": "Quality-focused search (high ratings matter most)",
+ "query": "wireless audio device",
+ "params": {
+ "description_weight": 0.7,
+ "name_weight": 0.5,
+ "brand_weight": 0.2,
+ "category_weight": 0.3,
+ "price_weight": 0.1,
+ "rating_weight": 1.0,
+ "limit": 3,
+ },
+ },
+ {
+ "name": "Budget-conscious search (price matters most)",
+ "query": "electronics device",
+ "params": {
+ "description_weight": 0.6,
+ "name_weight": 0.4,
+ "brand_weight": 0.1,
+ "category_weight": 0.2,
+ "price_weight": 1.0,
+ "rating_weight": 0.3,
+ "limit": 3,
+ },
+ },
+ {
+ "name": "Brand-focused search (brand loyalty)",
+ "query": "sony products",
+ "params": {
+ "description_weight": 0.5,
+ "name_weight": 0.3,
+ "brand_weight": 1.0,
+ "category_weight": 0.2,
+ "price_weight": 0.2,
+ "rating_weight": 0.4,
+ "limit": 3,
+ },
+ },
+ ]
+
+ for scenario in scenarios:
+ print(f"\n--- {scenario['name']} ---")
+ print(f"Query: '{scenario['query']}'")
+
+ results = retriever.invoke(scenario["query"], **scenario["params"])
+
+ for i, doc in enumerate(results, 1):
+ metadata = doc.metadata
+ print(
+ f" {i}. {metadata['name']} ({metadata['brand']}) - ${metadata['price']} - β{metadata['rating']}"
+ )
+
+ print()
+
+
+def example_4_news_article_search():
+ """
+ Example 4: News article search with sentiment and topics
+ Use case: News search with content, sentiment, topic categorization, and recency
+ """
+ print("=== Example 4: News Article Search ===")
+
+ # 1. Define Schema
+ class NewsArticleSchema(sl.Schema):
+ id: sl.IdField
+ headline: sl.String
+ content: sl.String
+ topic: sl.String
+ sentiment_score: sl.Float # -1 (negative) to 1 (positive)
+ published_at: sl.Timestamp
+ source: sl.String
+
+ news_schema = NewsArticleSchema()
+
+ # 2. Define Spaces
+ content_space = sl.TextSimilaritySpace(
+ text=news_schema.content, model="sentence-transformers/all-MiniLM-L6-v2"
+ )
+
+ headline_space = sl.TextSimilaritySpace(
+ text=news_schema.headline, model="sentence-transformers/all-MiniLM-L6-v2"
+ )
+
+ topic_space = sl.CategoricalSimilaritySpace(
+ category_input=news_schema.topic,
+ categories=[
+ "technology",
+ "politics",
+ "business",
+ "sports",
+ "entertainment",
+ "science",
+ ],
+ )
+
+ source_space = sl.CategoricalSimilaritySpace(
+ category_input=news_schema.source,
+ categories=["Reuters", "BBC", "CNN", "TechCrunch", "Bloomberg"],
+ )
+
+ # Sentiment space (can be configured to prefer positive or negative news)
+ sentiment_space = sl.NumberSpace(
+ number=news_schema.sentiment_score,
+ min_value=-1.0,
+ max_value=1.0,
+ mode=sl.Mode.MAXIMUM, # Default to preferring positive news
+ )
+
+ # Recency space
+ recency_space = sl.RecencySpace(
+ timestamp=news_schema.published_at,
+ period_time_list=[
+ sl.PeriodTime(timedelta(hours=6)), # Last 6 hours
+ sl.PeriodTime(timedelta(days=1)), # Last day
+ sl.PeriodTime(timedelta(days=7)), # Last week
+ ],
+ )
+
+ # 3. Create Index
+ news_index = sl.Index(
+ [
+ content_space,
+ headline_space,
+ topic_space,
+ source_space,
+ sentiment_space,
+ recency_space,
+ ]
+ )
+
+ # 4. Define Query
+ news_query = (
+ sl.Query(
+ news_index,
+ weights={
+ content_space: sl.Param("content_weight"),
+ headline_space: sl.Param("headline_weight"),
+ topic_space: sl.Param("topic_weight"),
+ source_space: sl.Param("source_weight"),
+ sentiment_space: sl.Param("sentiment_weight"),
+ recency_space: sl.Param("recency_weight"),
+ },
+ )
+ .find(news_schema)
+ .similar(content_space.text, sl.Param("query_text"))
+ .select(
+ [
+ news_schema.headline,
+ news_schema.content,
+ news_schema.topic,
+ news_schema.sentiment_score,
+ news_schema.published_at,
+ news_schema.source,
+ ]
+ )
+ .limit(sl.Param("limit"))
+ )
+
+ # 5. Sample news data
+ # Convert datetime objects to unix timestamps (integers) as required by Timestamp schema field
+ news_articles = [
+ {
+ "id": "news1",
+ "headline": "Major Breakthrough in AI Research Announced",
+ "content": "Scientists have developed a new artificial intelligence model that shows remarkable improvements in natural language understanding.",
+ "topic": "technology",
+ "sentiment_score": 0.8,
+ "published_at": int((datetime.now() - timedelta(hours=2)).timestamp()),
+ "source": "TechCrunch",
+ },
+ {
+ "id": "news2",
+ "headline": "Stock Market Faces Volatility Amid Economic Concerns",
+ "content": "Financial markets experienced significant fluctuations today as investors react to new economic data and policy announcements.",
+ "topic": "business",
+ "sentiment_score": -0.3,
+ "published_at": int((datetime.now() - timedelta(hours=8)).timestamp()),
+ "source": "Bloomberg",
+ },
+ {
+ "id": "news3",
+ "headline": "New Climate Research Shows Promising Results",
+ "content": "Recent studies indicate that innovative climate technologies are showing positive environmental impact and could help address climate change.",
+ "topic": "science",
+ "sentiment_score": 0.6,
+ "published_at": int((datetime.now() - timedelta(hours=12)).timestamp()),
+ "source": "Reuters",
+ },
+ {
+ "id": "news4",
+ "headline": "Tech Companies Report Strong Quarterly Earnings",
+ "content": "Several major technology companies exceeded expectations in their quarterly earnings reports, driven by AI and cloud computing growth.",
+ "topic": "technology",
+ "sentiment_score": 0.7,
+ "published_at": int((datetime.now() - timedelta(hours=4)).timestamp()),
+ "source": "CNN",
+ },
+ ]
+
+ # Create source and executor
+ source = sl.InMemorySource(schema=news_schema)
+ executor = sl.InMemoryExecutor(sources=[source], indices=[news_index])
+ app = executor.run()
+
+ # Add data to the source after the app is running
+ source.put(news_articles)
+
+ # 6. Create Retriever
+ retriever = SuperlinkedRetriever(
+ sl_client=app,
+ sl_query=news_query,
+ page_content_field="content",
+ metadata_fields=[
+ "headline",
+ "topic",
+ "sentiment_score",
+ "published_at",
+ "source",
+ ],
+ )
+
+ # 7. Demonstrate different news search strategies
+ print("Query: 'artificial intelligence developments'")
+
+ # Recent technology news
+ results = retriever.invoke(
+ "artificial intelligence developments",
+ content_weight=0.8,
+ headline_weight=0.6,
+ topic_weight=0.4,
+ source_weight=0.2,
+ sentiment_weight=0.3,
+ recency_weight=1.0, # Prioritize recent news
+ limit=2,
+ )
+
+ print("\nRecent Technology News:")
+ for i, doc in enumerate(results, 1):
+ metadata = doc.metadata
+ published_timestamp = metadata["published_at"]
+ # Convert unix timestamp back to datetime for display calculation
+ published_time = datetime.fromtimestamp(published_timestamp)
+ hours_ago = (datetime.now() - published_time).total_seconds() / 3600
+ sentiment = (
+ "π Positive"
+ if metadata["sentiment_score"] > 0
+ else "π Negative"
+ if metadata["sentiment_score"] < 0
+ else "β‘οΈ Neutral"
+ )
+
+ print(f" {i}. {metadata['headline']}")
+ print(f" Source: {metadata['source']} | {sentiment} | {hours_ago:.1f}h ago")
+
+ print()
+
+
+def demonstrate_langchain_integration():
+ """
+ Example 5: Integration with LangChain RAG pipeline
+ Shows how to use the SuperlinkedRetriever in a complete RAG workflow
+ """
+ print("=== Example 5: LangChain RAG Integration ===")
+
+ # This would typically be used with an actual LLM
+ # For demo purposes, we'll just show the retrieval part
+
+ # Quick setup of a simple retriever
+ class FAQSchema(sl.Schema):
+ id: sl.IdField
+ question: sl.String
+ answer: sl.String
+ category: sl.String
+
+ faq_schema = FAQSchema()
+
+ text_space = sl.TextSimilaritySpace(
+ text=faq_schema.question, model="sentence-transformers/all-MiniLM-L6-v2"
+ )
+
+ category_space = sl.CategoricalSimilaritySpace(
+ category_input=faq_schema.category,
+ categories=["technical", "billing", "general", "account"],
+ )
+
+ faq_index = sl.Index([text_space, category_space])
+
+ faq_query = (
+ sl.Query(
+ faq_index,
+ weights={
+ text_space: sl.Param("text_weight"),
+ category_space: sl.Param("category_weight"),
+ },
+ )
+ .find(faq_schema)
+ .similar(text_space.text, sl.Param("query_text"))
+ .select([faq_schema.question, faq_schema.answer, faq_schema.category])
+ .limit(sl.Param("limit"))
+ )
+
+ # Sample FAQ data
+ faqs = [
+ {
+ "id": "faq1",
+ "question": "How do I reset my password?",
+ "answer": "You can reset your password by clicking 'Forgot Password' on the login page and following the email instructions.",
+ "category": "account",
+ },
+ {
+ "id": "faq2",
+ "question": "Why is my API not working?",
+ "answer": "Check your API key, rate limits, and ensure you're using the correct endpoint URL.",
+ "category": "technical",
+ },
+ {
+ "id": "faq3",
+ "question": "How do I upgrade my subscription?",
+ "answer": "Visit the billing section in your account settings to upgrade your plan.",
+ "category": "billing",
+ },
+ ]
+
+ # Create source and executor
+ source = sl.InMemorySource(schema=faq_schema)
+ executor = sl.InMemoryExecutor(sources=[source], indices=[faq_index])
+ app = executor.run()
+
+ # Add data to the source after the app is running
+ source.put(faqs)
+
+ retriever = SuperlinkedRetriever(
+ sl_client=app,
+ sl_query=faq_query,
+ page_content_field="answer",
+ metadata_fields=["question", "category"],
+ )
+
+ # Simulate a RAG query
+ user_question = "I can't access the API"
+
+ print(f"User Question: '{user_question}'")
+ print("Retrieving relevant context...")
+
+ context_docs = retriever.invoke(
+ user_question, text_weight=1.0, category_weight=0.3, limit=2
+ )
+
+ print("\nRetrieved Context:")
+ for i, doc in enumerate(context_docs, 1):
+ print(f" {i}. Q: {doc.metadata['question']}")
+ print(f" A: {doc.page_content}")
+ print(f" Category: {doc.metadata['category']}")
+
+ print(
+ "\n[In a real RAG setup, this context would be passed to an LLM to generate a response]"
+ )
+ print()
+
+
+def example_6_qdrant_vector_database():
+ """
+ Example 6: Same retriever with Qdrant vector database
+ Use case: Production deployment with persistent vector storage
+
+ This demonstrates that SuperlinkedRetriever is vector database agnostic.
+ The SAME retriever code works with Qdrant (or Redis, MongoDB) by only
+ changing the executor configuration, not the retriever implementation.
+ """
+ print("=== Example 6: Qdrant Vector Database ===")
+
+ # 1. Define Schema (IDENTICAL to Example 1)
+ class DocumentSchema(sl.Schema):
+ id: sl.IdField
+ content: sl.String
+
+ doc_schema = DocumentSchema()
+
+ # 2. Define Space and Index (IDENTICAL to Example 1)
+ text_space = sl.TextSimilaritySpace(
+ text=doc_schema.content, model="sentence-transformers/all-MiniLM-L6-v2"
+ )
+
+ doc_index = sl.Index([text_space])
+
+ # 3. Define Query (IDENTICAL to Example 1)
+ query = (
+ sl.Query(doc_index)
+ .find(doc_schema)
+ .similar(text_space.text, sl.Param("query_text"))
+ .select([doc_schema.content])
+ .limit(sl.Param("limit"))
+ )
+
+ # 4. Configure Qdrant Vector Database (ONLY DIFFERENCE!)
+ print("π§ Configuring Qdrant vector database...")
+ try:
+ qdrant_vector_db = sl.QdrantVectorDatabase(
+ url="https://your-qdrant-cluster.qdrant.io", # Replace with your Qdrant URL
+ api_key="your-api-key-here", # Replace with your API key
+ default_query_limit=10,
+ vector_precision=sl.Precision.FLOAT16,
+ )
+ print(
+ "β
Qdrant configuration created (credentials needed for actual connection)"
+ )
+ except Exception as e:
+ print(f"β οΈ Qdrant not configured (expected without credentials): {e}")
+ print("π Using in-memory fallback for demonstration...")
+ qdrant_vector_db = None
+
+ # 5. Set up data and app (SLIGHT DIFFERENCE - vector database parameter)
+ documents = [
+ {
+ "id": "doc1",
+ "content": "Machine learning algorithms can process large datasets efficiently.",
+ },
+ {
+ "id": "doc2",
+ "content": "Natural language processing enables computers to understand human language.",
+ },
+ {
+ "id": "doc3",
+ "content": "Deep learning models require significant computational resources.",
+ },
+ {
+ "id": "doc4",
+ "content": "Data science combines statistics, programming, and domain expertise.",
+ },
+ {
+ "id": "doc5",
+ "content": "Artificial intelligence is transforming various industries.",
+ },
+ ]
+
+ # Create source and executor with Qdrant (or fallback to in-memory)
+ source = sl.InMemorySource(schema=doc_schema)
+
+ if qdrant_vector_db:
+ # Production setup with Qdrant
+ executor = sl.InMemoryExecutor(
+ sources=[source],
+ indices=[doc_index],
+ vector_database=qdrant_vector_db, # π This makes it use Qdrant!
+ )
+ storage_type = "Qdrant (persistent)"
+ else:
+ # Fallback to in-memory for demo
+ executor = sl.InMemoryExecutor(sources=[source], indices=[doc_index])
+ storage_type = "In-Memory (fallback)"
+
+ app = executor.run()
+
+ # Add data to the source after the app is running
+ source.put(documents)
+
+ # 6. Create Retriever (IDENTICAL CODE!)
+ retriever = SuperlinkedRetriever(
+ sl_client=app, sl_query=query, page_content_field="content"
+ )
+
+ # 7. Use the retriever (IDENTICAL CODE!)
+ results = retriever.invoke("artificial intelligence and machine learning", limit=3)
+
+ print(f"π Vector Storage: {storage_type}")
+ print("π Query: 'artificial intelligence and machine learning'")
+ print(f"π Found {len(results)} documents:")
+ for i, doc in enumerate(results, 1):
+ print(f" {i}. {doc.page_content}")
+
+ print(
+ "\nβ
Key Insight: Same SuperlinkedRetriever code works with any vector database!"
+ )
+ print(
+ "β
Only executor configuration changes, retriever implementation stays identical"
+ )
+ print("β
Switch between in-memory β Qdrant β Redis β MongoDB without code changes")
+ print()
+
+
+def main():
+ """
+ Run all examples to demonstrate the flexibility of SuperlinkedRetriever
+ """
+ print("SuperlinkedRetriever Examples")
+ print("=" * 50)
+ print("This file demonstrates how the SuperlinkedRetriever can be used")
+ print("with different space configurations for various use cases.\n")
+
+ try:
+ example_1_simple_text_search()
+ example_2_multi_space_blog_search()
+ example_3_ecommerce_product_search()
+ example_4_news_article_search()
+ demonstrate_langchain_integration()
+ example_6_qdrant_vector_database()
+
+ print("π All examples completed successfully!")
+
+ except Exception as e:
+ print(f"β Error running examples: {e}")
+ print("Make sure you have 'superlinked' package installed:")
+ print("pip install superlinked")
+
+
+if __name__ == "__main__":
+ main()