diff --git a/README.md b/README.md index 207be11fd..a54440073 100644 --- a/README.md +++ b/README.md @@ -76,6 +76,9 @@ This project supports a Docker-based setup, streamlining installation and execut ``` This command builds the Docker images for the frontend and backend from their respective Dockerfiles and starts all necessary services, including Postgres. + + **NOTE**: Make sure your `docker compose version` is greater than or equal v2.24.6. If you're using Docker Desktop you can just upgrade your Docker Desktop version. See here for more on [installing Docker Compose](https://docs.docker.com/compose/install/). + 5. **Access the Application:** With the services running, access the frontend at [http://localhost:5173](http://localhost:5173), substituting `5173` with the designated port number. diff --git a/backend/app/agent.py b/backend/app/agent.py index 5092da5c0..cd46fbb9b 100644 --- a/backend/app/agent.py +++ b/backend/app/agent.py @@ -1,3 +1,4 @@ +import random from enum import Enum from typing import Any, Mapping, Optional, Sequence, Union @@ -7,6 +8,8 @@ RunnableBinding, ) from langgraph.checkpoint import CheckpointAt +from langsmith import Client as LangSmithClient +from langsmith.schemas import Example from app.agent_types.google_agent import get_google_agent_executor from app.agent_types.openai_agent import get_openai_agent_executor @@ -70,12 +73,65 @@ class AgentType(str, Enum): CHECKPOINTER = PostgresCheckpoint(at=CheckpointAt.END_OF_STEP) +def _format_chat_example(example: Example) -> str: + feedback = "" + for i in example.inputs["input"][1:]: + if i["type"] == "human": + feedback += "\n" + i["content"] + "\n\n" + return f""" +{example.inputs['input'][0]['content']} + +{feedback} +{example.outputs['output']['content']} +""" + + +def _format_agent_example(example: Example) -> str: + new_messages = [] + for o in example.outputs["output"][1:][::-1]: + if o["type"] == "human": + break + new_messages.append(o) + return f""" +{[example.outputs['output'][0]] + new_messages[::-1]} +""" + + +def get_few_shot_str(assistant_id: str, *, agent: bool = False) -> str: + client = LangSmithClient() + if client.has_dataset(dataset_name=assistant_id): + examples = list(client.list_examples(dataset_name=assistant_id)) + if not examples: + return "" + # TODO: Make this not random. Could be latest, could use some similarity + # measure. + examples = random.sample(examples, min(len(examples), 5)) + if agent: + example_str = "\n".join([_format_agent_example(e) for e in examples]) + else: + example_str = "\n".join([_format_chat_example(e) for e in examples]) + return f"""Here are some previous interactions with a user trying to accomplish a similar task. \ +You should assume that the final output is the desired one, and any \ +intermediate steps were wrong in some way, and the human then tried to improve upon \ +them in specific ways. Learn from these previous interactions and do not repeat past \ +mistakes! + +{example_str} +""" + + def get_agent_executor( tools: list, agent: AgentType, system_message: str, interrupt_before_action: bool, + *, + assistant_id: Optional[str] = None, + self_learning: bool = False, ): + if self_learning and assistant_id is not None: + system_message += "\n\n" + get_few_shot_str(assistant_id, agent=True) + if agent == AgentType.GPT_35_TURBO: llm = get_openai_llm() return get_openai_agent_executor( @@ -119,6 +175,7 @@ class ConfigurableAgent(RunnableBinding): assistant_id: Optional[str] = None thread_id: Optional[str] = None user_id: Optional[str] = None + self_learning: bool = False def __init__( self, @@ -130,6 +187,7 @@ def __init__( thread_id: Optional[str] = None, retrieval_description: str = RETRIEVAL_DESCRIPTION, interrupt_before_action: bool = False, + self_learning: bool = False, kwargs: Optional[Mapping[str, Any]] = None, config: Optional[Mapping[str, Any]] = None, **others: Any, @@ -153,7 +211,12 @@ def __init__( else: _tools.append(_returned_tools) _agent = get_agent_executor( - _tools, agent, system_message, interrupt_before_action + _tools, + agent, + system_message, + interrupt_before_action, + assistant_id=assistant_id, + self_learning=self_learning, ) agent_executor = _agent.with_config({"recursion_limit": 50}) super().__init__( @@ -180,6 +243,9 @@ class LLMType(str, Enum): def get_chatbot( llm_type: LLMType, system_message: str, + *, + assistant_id: Optional[str] = None, + self_learning: bool = False, ): if llm_type == LLMType.GPT_35_TURBO: llm = get_openai_llm() @@ -197,6 +263,10 @@ def get_chatbot( llm = get_mixtral_fireworks() else: raise ValueError("Unexpected llm type") + + if self_learning and assistant_id: + system_message += "\n\n" + get_few_shot_str(assistant_id) + return get_chatbot_executor(llm, system_message, CHECKPOINTER) @@ -204,19 +274,25 @@ class ConfigurableChatBot(RunnableBinding): llm: LLMType system_message: str = DEFAULT_SYSTEM_MESSAGE user_id: Optional[str] = None + assistant_id: Optional[str] = None + self_learning: bool = False def __init__( self, *, llm: LLMType = LLMType.GPT_35_TURBO, system_message: str = DEFAULT_SYSTEM_MESSAGE, + assistant_id: Optional[str] = None, + self_learning: bool = False, kwargs: Optional[Mapping[str, Any]] = None, config: Optional[Mapping[str, Any]] = None, **others: Any, ) -> None: others.pop("bound", None) - chatbot = get_chatbot(llm, system_message) + chatbot = get_chatbot( + llm, system_message, assistant_id=assistant_id, self_learning=self_learning + ) super().__init__( llm=llm, system_message=system_message, @@ -231,6 +307,14 @@ def __init__( .configurable_fields( llm=ConfigurableField(id="llm_type", name="LLM Type"), system_message=ConfigurableField(id="system_message", name="Instructions"), + assistant_id=ConfigurableField( + id="assistant_id", name="Assistant ID", is_shared=True + ), + self_learning=ConfigurableField( + id="self_learning", + name="Self-learning", + description="A self-learning GPT is one that will learn use user feedback to improve over time.", + ), ) .with_types(input_type=Sequence[AnyMessage], output_type=Sequence[AnyMessage]) ) @@ -291,12 +375,14 @@ def __init__( id="assistant_id", name="Assistant ID", is_shared=True ), thread_id=ConfigurableField(id="thread_id", name="Thread ID", is_shared=True), + # TODO: Add support + # self_learning=ConfigurableField(id="self_learning", name="Self-learning") ) .with_types(input_type=Sequence[AnyMessage], output_type=Sequence[AnyMessage]) ) -agent = ( +agent_w_tools = ( ConfigurableAgent( agent=AgentType.GPT_35_TURBO, tools=[], @@ -321,17 +407,24 @@ def __init__( retrieval_description=ConfigurableField( id="retrieval_description", name="Retrieval Description" ), - ) - .configurable_alternatives( - ConfigurableField(id="type", name="Bot Type"), - default_key="agent", - prefix_keys=True, - chatbot=chatbot, - chat_retrieval=chat_retrieval, + self_learning=ConfigurableField( + id="self_learning", + name="Self-learning", + description="A self-learning GPT is one that will learn use user feedback to improve over time.", + ), ) .with_types(input_type=Sequence[AnyMessage], output_type=Sequence[AnyMessage]) ) + +agent = agent_w_tools.configurable_alternatives( + ConfigurableField(id="type", name="Bot Type"), + default_key="agent", + prefix_keys=True, + chatbot=chatbot, + chat_retrieval=chat_retrieval, +).with_types(input_type=Sequence[AnyMessage], output_type=Sequence[AnyMessage]) + if __name__ == "__main__": import asyncio diff --git a/backend/app/api/assistants.py b/backend/app/api/assistants.py index 1667c5f44..b43465837 100644 --- a/backend/app/api/assistants.py +++ b/backend/app/api/assistants.py @@ -1,7 +1,9 @@ -from typing import Annotated, List, Optional +import os +from typing import Annotated, List, Literal, Optional from uuid import uuid4 from fastapi import APIRouter, HTTPException, Path, Query +from langsmith import Client as LangSmithClient from pydantic import BaseModel, Field import app.storage as storage @@ -68,6 +70,42 @@ async def create_assistant( ) +def _create_few_shot_dataset_and_rule( + aid: AssistantID, assistant_type: Literal["agent", "chatbot"] +) -> None: + client = LangSmithClient() + dataset = client.create_dataset(aid) + eq_filters = [ + ("feedback_key", '"user_score"'), + ("feedback_score", 1), + ("metadata_key", '"assistant_id"'), + ("metadata_value", f'"{aid}"'), + ] + formatted_eq_filters = ", ".join(f"eq({attr}, {val})" for attr, val in eq_filters) + user_liked_filter = f"and({formatted_eq_filters})" + session_id = client.read_project(project_name=os.environ["LANGCHAIN_PROJECT"]).id + payload = { + "display_name": f"few shot {aid}", + "session_id": str(session_id), + "sampling_rate": 1, + "add_to_dataset_id": str(dataset.id), + } + if assistant_type == "agent": + payload["filter"] = user_liked_filter + elif assistant_type == "chatbot": + payload["filter"] = 'eq(name, "chatbot")' + payload["trace_filter"] = user_liked_filter + else: + raise ValueError( + f"Unknown assistant_type {assistant_type}. Expected 'agent' or 'chatbot'." + ) + client.request_with_retries( + "POST", + client.api_url + "/runs/rules", + {"json": payload, "headers": client._headers}, + ) + + @router.put("/{aid}") async def upsert_assistant( opengpts_user_id: OpengptsUserId, @@ -75,6 +113,9 @@ async def upsert_assistant( payload: AssistantPayload, ) -> Assistant: """Create or update an assistant.""" + assistant_type = payload.config["configurable"]["type"] + if payload.config["configurable"][f"type=={assistant_type}/self_learning"]: + _create_few_shot_dataset_and_rule(aid, payload.config["configurable"]["type"]) return await storage.put_assistant( opengpts_user_id, aid, diff --git a/backend/app/checkpoint.py b/backend/app/checkpoint.py index 88df94330..9e4681b48 100644 --- a/backend/app/checkpoint.py +++ b/backend/app/checkpoint.py @@ -1,10 +1,10 @@ -from datetime import datetime import pickle +from datetime import datetime from typing import AsyncIterator, Optional from langchain_core.runnables import ConfigurableFieldSpec, RunnableConfig from langgraph.checkpoint import BaseCheckpointSaver -from langgraph.checkpoint.base import Checkpoint, CheckpointTuple, CheckpointThreadTs +from langgraph.checkpoint.base import Checkpoint, CheckpointThreadTs, CheckpointTuple from app.lifespan import get_pg_pool diff --git a/backend/app/upload.py b/backend/app/upload.py index e4adc8148..3d8c32435 100644 --- a/backend/app/upload.py +++ b/backend/app/upload.py @@ -11,7 +11,6 @@ import os from typing import Any, BinaryIO, List, Optional -from langchain_text_splitters import RecursiveCharacterTextSplitter, TextSplitter from langchain_community.document_loaders.blob_loaders.schema import Blob from langchain_community.vectorstores.pgvector import PGVector from langchain_core.runnables import ( @@ -21,6 +20,7 @@ ) from langchain_core.vectorstores import VectorStore from langchain_openai import OpenAIEmbeddings +from langchain_text_splitters import RecursiveCharacterTextSplitter, TextSplitter from app.ingest import ingest_blob from app.parsing import MIMETYPE_BASED_PARSER diff --git a/backend/tests/unit_tests/app/test_app.py b/backend/tests/unit_tests/app/test_app.py index f2bfdc6cf..ff840bc1b 100644 --- a/backend/tests/unit_tests/app/test_app.py +++ b/backend/tests/unit_tests/app/test_app.py @@ -44,13 +44,17 @@ async def test_list_and_create_assistants(pool: asyncpg.pool.Pool) -> None: # Create an assistant response = await client.put( f"/assistants/{aid}", - json={"name": "bobby", "config": {}, "public": False}, + json={ + "name": "bobby", + "config": {"configurable": {"type": "agent", "self_learning": False}}, + "public": False, + }, headers=headers, ) assert response.status_code == 200 assert _project(response.json(), exclude_keys=["updated_at"]) == { "assistant_id": aid, - "config": {}, + "config": {"configurable": {"type": "agent", "self_learning": False}}, "name": "bobby", "public": False, "user_id": "1", @@ -62,7 +66,7 @@ async def test_list_and_create_assistants(pool: asyncpg.pool.Pool) -> None: assert [_project(d, exclude_keys=["updated_at"]) for d in response.json()] == [ { "assistant_id": aid, - "config": {}, + "config": {"configurable": {"type": "agent", "self_learning": False}}, "name": "bobby", "public": False, "user_id": "1", @@ -71,13 +75,22 @@ async def test_list_and_create_assistants(pool: asyncpg.pool.Pool) -> None: response = await client.put( f"/assistants/{aid}", - json={"name": "bobby", "config": {}, "public": False}, + json={ + "name": "bobby", + "config": { + "configurable": { + "type": "chatbot", + "self_learning": False, + } + }, + "public": False, + }, headers=headers, ) assert _project(response.json(), exclude_keys=["updated_at"]) == { "assistant_id": aid, - "config": {}, + "config": {"configurable": {"type": "chatbot", "self_learning": False}}, "name": "bobby", "public": False, "user_id": "1", @@ -99,7 +112,11 @@ async def test_threads() -> None: async with get_client() as client: response = await client.put( f"/assistants/{aid}", - json={"name": "assistant", "config": {}, "public": False}, + json={ + "name": "assistant", + "config": {"configurable": {"type": "agent", "self_learning": False}}, + "public": False, + }, headers=headers, ) diff --git a/frontend/index.html b/frontend/index.html index 304437ae1..6b15059a4 100644 --- a/frontend/index.html +++ b/frontend/index.html @@ -3,7 +3,7 @@ - OpenGPTs + Self Learning GPTs diff --git a/frontend/src/components/Config.tsx b/frontend/src/components/Config.tsx index 3220bea77..52ca00262 100644 --- a/frontend/src/components/Config.tsx +++ b/frontend/src/components/Config.tsx @@ -29,7 +29,9 @@ function Types(props: { alwaysExpanded?: boolean; }) { const options = - props.field.enum?.map((id) => TYPES[id as keyof typeof TYPES]) ?? []; + props.field.enum + ?.map((id) => TYPES[id as keyof typeof TYPES]) + .filter(Boolean) ?? []; return (
@@ -472,6 +474,7 @@ const ORDER = [ "tools", "llm_type", "agent_type", + "self_learning", ]; export function Config(props: { diff --git a/frontend/src/components/Layout.tsx b/frontend/src/components/Layout.tsx index 551527a13..3d0b5c776 100644 --- a/frontend/src/components/Layout.tsx +++ b/frontend/src/components/Layout.tsx @@ -101,10 +101,11 @@ export function Layout(props: {
{props.subtitle ? ( <> - OpenGPTs: {props.subtitle} + Self Learning GPTs:{" "} + {props.subtitle} ) : ( - "OpenGPTs" + "Self Learning GPTs" )}