diff --git a/README.md b/README.md
index 207be11fd..a54440073 100644
--- a/README.md
+++ b/README.md
@@ -76,6 +76,9 @@ This project supports a Docker-based setup, streamlining installation and execut
```
This command builds the Docker images for the frontend and backend from their respective Dockerfiles and starts all necessary services, including Postgres.
+
+ **NOTE**: Make sure your `docker compose version` is greater than or equal v2.24.6. If you're using Docker Desktop you can just upgrade your Docker Desktop version. See here for more on [installing Docker Compose](https://docs.docker.com/compose/install/).
+
5. **Access the Application:**
With the services running, access the frontend at [http://localhost:5173](http://localhost:5173), substituting `5173` with the designated port number.
diff --git a/backend/app/agent.py b/backend/app/agent.py
index 5092da5c0..cd46fbb9b 100644
--- a/backend/app/agent.py
+++ b/backend/app/agent.py
@@ -1,3 +1,4 @@
+import random
from enum import Enum
from typing import Any, Mapping, Optional, Sequence, Union
@@ -7,6 +8,8 @@
RunnableBinding,
)
from langgraph.checkpoint import CheckpointAt
+from langsmith import Client as LangSmithClient
+from langsmith.schemas import Example
from app.agent_types.google_agent import get_google_agent_executor
from app.agent_types.openai_agent import get_openai_agent_executor
@@ -70,12 +73,65 @@ class AgentType(str, Enum):
CHECKPOINTER = PostgresCheckpoint(at=CheckpointAt.END_OF_STEP)
+def _format_chat_example(example: Example) -> str:
+ feedback = ""
+ for i in example.inputs["input"][1:]:
+ if i["type"] == "human":
+ feedback += "\n" + i["content"] + "\n\n"
+ return f"""
+{example.inputs['input'][0]['content']}
+
+{feedback}"""
+
+
+def _format_agent_example(example: Example) -> str:
+ new_messages = []
+ for o in example.outputs["output"][1:][::-1]:
+ if o["type"] == "human":
+ break
+ new_messages.append(o)
+ return f"""
+{[example.outputs['output'][0]] + new_messages[::-1]}
+"""
+
+
+def get_few_shot_str(assistant_id: str, *, agent: bool = False) -> str:
+ client = LangSmithClient()
+ if client.has_dataset(dataset_name=assistant_id):
+ examples = list(client.list_examples(dataset_name=assistant_id))
+ if not examples:
+ return ""
+ # TODO: Make this not random. Could be latest, could use some similarity
+ # measure.
+ examples = random.sample(examples, min(len(examples), 5))
+ if agent:
+ example_str = "\n".join([_format_agent_example(e) for e in examples])
+ else:
+ example_str = "\n".join([_format_chat_example(e) for e in examples])
+ return f"""Here are some previous interactions with a user trying to accomplish a similar task. \
+You should assume that the final output is the desired one, and any \
+intermediate steps were wrong in some way, and the human then tried to improve upon \
+them in specific ways. Learn from these previous interactions and do not repeat past \
+mistakes!
+
+{example_str}
+"""
+
+
def get_agent_executor(
tools: list,
agent: AgentType,
system_message: str,
interrupt_before_action: bool,
+ *,
+ assistant_id: Optional[str] = None,
+ self_learning: bool = False,
):
+ if self_learning and assistant_id is not None:
+ system_message += "\n\n" + get_few_shot_str(assistant_id, agent=True)
+
if agent == AgentType.GPT_35_TURBO:
llm = get_openai_llm()
return get_openai_agent_executor(
@@ -119,6 +175,7 @@ class ConfigurableAgent(RunnableBinding):
assistant_id: Optional[str] = None
thread_id: Optional[str] = None
user_id: Optional[str] = None
+ self_learning: bool = False
def __init__(
self,
@@ -130,6 +187,7 @@ def __init__(
thread_id: Optional[str] = None,
retrieval_description: str = RETRIEVAL_DESCRIPTION,
interrupt_before_action: bool = False,
+ self_learning: bool = False,
kwargs: Optional[Mapping[str, Any]] = None,
config: Optional[Mapping[str, Any]] = None,
**others: Any,
@@ -153,7 +211,12 @@ def __init__(
else:
_tools.append(_returned_tools)
_agent = get_agent_executor(
- _tools, agent, system_message, interrupt_before_action
+ _tools,
+ agent,
+ system_message,
+ interrupt_before_action,
+ assistant_id=assistant_id,
+ self_learning=self_learning,
)
agent_executor = _agent.with_config({"recursion_limit": 50})
super().__init__(
@@ -180,6 +243,9 @@ class LLMType(str, Enum):
def get_chatbot(
llm_type: LLMType,
system_message: str,
+ *,
+ assistant_id: Optional[str] = None,
+ self_learning: bool = False,
):
if llm_type == LLMType.GPT_35_TURBO:
llm = get_openai_llm()
@@ -197,6 +263,10 @@ def get_chatbot(
llm = get_mixtral_fireworks()
else:
raise ValueError("Unexpected llm type")
+
+ if self_learning and assistant_id:
+ system_message += "\n\n" + get_few_shot_str(assistant_id)
+
return get_chatbot_executor(llm, system_message, CHECKPOINTER)
@@ -204,19 +274,25 @@ class ConfigurableChatBot(RunnableBinding):
llm: LLMType
system_message: str = DEFAULT_SYSTEM_MESSAGE
user_id: Optional[str] = None
+ assistant_id: Optional[str] = None
+ self_learning: bool = False
def __init__(
self,
*,
llm: LLMType = LLMType.GPT_35_TURBO,
system_message: str = DEFAULT_SYSTEM_MESSAGE,
+ assistant_id: Optional[str] = None,
+ self_learning: bool = False,
kwargs: Optional[Mapping[str, Any]] = None,
config: Optional[Mapping[str, Any]] = None,
**others: Any,
) -> None:
others.pop("bound", None)
- chatbot = get_chatbot(llm, system_message)
+ chatbot = get_chatbot(
+ llm, system_message, assistant_id=assistant_id, self_learning=self_learning
+ )
super().__init__(
llm=llm,
system_message=system_message,
@@ -231,6 +307,14 @@ def __init__(
.configurable_fields(
llm=ConfigurableField(id="llm_type", name="LLM Type"),
system_message=ConfigurableField(id="system_message", name="Instructions"),
+ assistant_id=ConfigurableField(
+ id="assistant_id", name="Assistant ID", is_shared=True
+ ),
+ self_learning=ConfigurableField(
+ id="self_learning",
+ name="Self-learning",
+ description="A self-learning GPT is one that will learn use user feedback to improve over time.",
+ ),
)
.with_types(input_type=Sequence[AnyMessage], output_type=Sequence[AnyMessage])
)
@@ -291,12 +375,14 @@ def __init__(
id="assistant_id", name="Assistant ID", is_shared=True
),
thread_id=ConfigurableField(id="thread_id", name="Thread ID", is_shared=True),
+ # TODO: Add support
+ # self_learning=ConfigurableField(id="self_learning", name="Self-learning")
)
.with_types(input_type=Sequence[AnyMessage], output_type=Sequence[AnyMessage])
)
-agent = (
+agent_w_tools = (
ConfigurableAgent(
agent=AgentType.GPT_35_TURBO,
tools=[],
@@ -321,17 +407,24 @@ def __init__(
retrieval_description=ConfigurableField(
id="retrieval_description", name="Retrieval Description"
),
- )
- .configurable_alternatives(
- ConfigurableField(id="type", name="Bot Type"),
- default_key="agent",
- prefix_keys=True,
- chatbot=chatbot,
- chat_retrieval=chat_retrieval,
+ self_learning=ConfigurableField(
+ id="self_learning",
+ name="Self-learning",
+ description="A self-learning GPT is one that will learn use user feedback to improve over time.",
+ ),
)
.with_types(input_type=Sequence[AnyMessage], output_type=Sequence[AnyMessage])
)
+
+agent = agent_w_tools.configurable_alternatives(
+ ConfigurableField(id="type", name="Bot Type"),
+ default_key="agent",
+ prefix_keys=True,
+ chatbot=chatbot,
+ chat_retrieval=chat_retrieval,
+).with_types(input_type=Sequence[AnyMessage], output_type=Sequence[AnyMessage])
+
if __name__ == "__main__":
import asyncio
diff --git a/backend/app/api/assistants.py b/backend/app/api/assistants.py
index 1667c5f44..b43465837 100644
--- a/backend/app/api/assistants.py
+++ b/backend/app/api/assistants.py
@@ -1,7 +1,9 @@
-from typing import Annotated, List, Optional
+import os
+from typing import Annotated, List, Literal, Optional
from uuid import uuid4
from fastapi import APIRouter, HTTPException, Path, Query
+from langsmith import Client as LangSmithClient
from pydantic import BaseModel, Field
import app.storage as storage
@@ -68,6 +70,42 @@ async def create_assistant(
)
+def _create_few_shot_dataset_and_rule(
+ aid: AssistantID, assistant_type: Literal["agent", "chatbot"]
+) -> None:
+ client = LangSmithClient()
+ dataset = client.create_dataset(aid)
+ eq_filters = [
+ ("feedback_key", '"user_score"'),
+ ("feedback_score", 1),
+ ("metadata_key", '"assistant_id"'),
+ ("metadata_value", f'"{aid}"'),
+ ]
+ formatted_eq_filters = ", ".join(f"eq({attr}, {val})" for attr, val in eq_filters)
+ user_liked_filter = f"and({formatted_eq_filters})"
+ session_id = client.read_project(project_name=os.environ["LANGCHAIN_PROJECT"]).id
+ payload = {
+ "display_name": f"few shot {aid}",
+ "session_id": str(session_id),
+ "sampling_rate": 1,
+ "add_to_dataset_id": str(dataset.id),
+ }
+ if assistant_type == "agent":
+ payload["filter"] = user_liked_filter
+ elif assistant_type == "chatbot":
+ payload["filter"] = 'eq(name, "chatbot")'
+ payload["trace_filter"] = user_liked_filter
+ else:
+ raise ValueError(
+ f"Unknown assistant_type {assistant_type}. Expected 'agent' or 'chatbot'."
+ )
+ client.request_with_retries(
+ "POST",
+ client.api_url + "/runs/rules",
+ {"json": payload, "headers": client._headers},
+ )
+
+
@router.put("/{aid}")
async def upsert_assistant(
opengpts_user_id: OpengptsUserId,
@@ -75,6 +113,9 @@ async def upsert_assistant(
payload: AssistantPayload,
) -> Assistant:
"""Create or update an assistant."""
+ assistant_type = payload.config["configurable"]["type"]
+ if payload.config["configurable"][f"type=={assistant_type}/self_learning"]:
+ _create_few_shot_dataset_and_rule(aid, payload.config["configurable"]["type"])
return await storage.put_assistant(
opengpts_user_id,
aid,
diff --git a/backend/app/checkpoint.py b/backend/app/checkpoint.py
index 88df94330..9e4681b48 100644
--- a/backend/app/checkpoint.py
+++ b/backend/app/checkpoint.py
@@ -1,10 +1,10 @@
-from datetime import datetime
import pickle
+from datetime import datetime
from typing import AsyncIterator, Optional
from langchain_core.runnables import ConfigurableFieldSpec, RunnableConfig
from langgraph.checkpoint import BaseCheckpointSaver
-from langgraph.checkpoint.base import Checkpoint, CheckpointTuple, CheckpointThreadTs
+from langgraph.checkpoint.base import Checkpoint, CheckpointThreadTs, CheckpointTuple
from app.lifespan import get_pg_pool
diff --git a/backend/app/upload.py b/backend/app/upload.py
index e4adc8148..3d8c32435 100644
--- a/backend/app/upload.py
+++ b/backend/app/upload.py
@@ -11,7 +11,6 @@
import os
from typing import Any, BinaryIO, List, Optional
-from langchain_text_splitters import RecursiveCharacterTextSplitter, TextSplitter
from langchain_community.document_loaders.blob_loaders.schema import Blob
from langchain_community.vectorstores.pgvector import PGVector
from langchain_core.runnables import (
@@ -21,6 +20,7 @@
)
from langchain_core.vectorstores import VectorStore
from langchain_openai import OpenAIEmbeddings
+from langchain_text_splitters import RecursiveCharacterTextSplitter, TextSplitter
from app.ingest import ingest_blob
from app.parsing import MIMETYPE_BASED_PARSER
diff --git a/backend/tests/unit_tests/app/test_app.py b/backend/tests/unit_tests/app/test_app.py
index f2bfdc6cf..ff840bc1b 100644
--- a/backend/tests/unit_tests/app/test_app.py
+++ b/backend/tests/unit_tests/app/test_app.py
@@ -44,13 +44,17 @@ async def test_list_and_create_assistants(pool: asyncpg.pool.Pool) -> None:
# Create an assistant
response = await client.put(
f"/assistants/{aid}",
- json={"name": "bobby", "config": {}, "public": False},
+ json={
+ "name": "bobby",
+ "config": {"configurable": {"type": "agent", "self_learning": False}},
+ "public": False,
+ },
headers=headers,
)
assert response.status_code == 200
assert _project(response.json(), exclude_keys=["updated_at"]) == {
"assistant_id": aid,
- "config": {},
+ "config": {"configurable": {"type": "agent", "self_learning": False}},
"name": "bobby",
"public": False,
"user_id": "1",
@@ -62,7 +66,7 @@ async def test_list_and_create_assistants(pool: asyncpg.pool.Pool) -> None:
assert [_project(d, exclude_keys=["updated_at"]) for d in response.json()] == [
{
"assistant_id": aid,
- "config": {},
+ "config": {"configurable": {"type": "agent", "self_learning": False}},
"name": "bobby",
"public": False,
"user_id": "1",
@@ -71,13 +75,22 @@ async def test_list_and_create_assistants(pool: asyncpg.pool.Pool) -> None:
response = await client.put(
f"/assistants/{aid}",
- json={"name": "bobby", "config": {}, "public": False},
+ json={
+ "name": "bobby",
+ "config": {
+ "configurable": {
+ "type": "chatbot",
+ "self_learning": False,
+ }
+ },
+ "public": False,
+ },
headers=headers,
)
assert _project(response.json(), exclude_keys=["updated_at"]) == {
"assistant_id": aid,
- "config": {},
+ "config": {"configurable": {"type": "chatbot", "self_learning": False}},
"name": "bobby",
"public": False,
"user_id": "1",
@@ -99,7 +112,11 @@ async def test_threads() -> None:
async with get_client() as client:
response = await client.put(
f"/assistants/{aid}",
- json={"name": "assistant", "config": {}, "public": False},
+ json={
+ "name": "assistant",
+ "config": {"configurable": {"type": "agent", "self_learning": False}},
+ "public": False,
+ },
headers=headers,
)
diff --git a/frontend/index.html b/frontend/index.html
index 304437ae1..6b15059a4 100644
--- a/frontend/index.html
+++ b/frontend/index.html
@@ -3,7 +3,7 @@
- OpenGPTs
+ Self Learning GPTs
diff --git a/frontend/src/components/Config.tsx b/frontend/src/components/Config.tsx
index 3220bea77..52ca00262 100644
--- a/frontend/src/components/Config.tsx
+++ b/frontend/src/components/Config.tsx
@@ -29,7 +29,9 @@ function Types(props: {
alwaysExpanded?: boolean;
}) {
const options =
- props.field.enum?.map((id) => TYPES[id as keyof typeof TYPES]) ?? [];
+ props.field.enum
+ ?.map((id) => TYPES[id as keyof typeof TYPES])
+ .filter(Boolean) ?? [];
return (
@@ -472,6 +474,7 @@ const ORDER = [
"tools",
"llm_type",
"agent_type",
+ "self_learning",
];
export function Config(props: {
diff --git a/frontend/src/components/Layout.tsx b/frontend/src/components/Layout.tsx
index 551527a13..3d0b5c776 100644
--- a/frontend/src/components/Layout.tsx
+++ b/frontend/src/components/Layout.tsx
@@ -101,10 +101,11 @@ export function Layout(props: {
{props.subtitle ? (
<>
- OpenGPTs: {props.subtitle}
+ Self Learning GPTs:{" "}
+ {props.subtitle}
>
) : (
- "OpenGPTs"
+ "Self Learning GPTs"
)}