From 360b66c672aed6b9bd36565498a43711d511e923 Mon Sep 17 00:00:00 2001
From: Andrew Brookins <a.m.brookins@gmail.com>
Date: Fri, 13 Jun 2025 12:45:29 -0700
Subject: [PATCH 01/12] Make a separate codebase for the client

---
 .github/workflows/agent-memory-client.yml     | 136 ++++
 CLAUDE.md                                     |   9 +-
 agent-memory-client/README.md                 | 318 ++++++++
 .../agent_memory_client/__init__.py           |  34 +
 .../agent_memory_client/client.py             | 660 ++++++++---------
 .../agent_memory_client/exceptions.py         |  34 +
 .../agent_memory_client/filters.py            |  91 +++
 .../agent_memory_client/models.py             | 226 ++++++
 agent-memory-client/pyproject.toml            |  99 +++
 agent-memory-client/tests/test_basic.py       |  98 +++
 agent-memory-client/tests/test_client.py      | 684 ++++++++++++++++++
 agent-memory-client/uv.lock                   | 473 ++++++++++++
 agent_memory_server/__init__.py               |   2 +-
 agent_memory_server/api.py                    |   4 +-
 agent_memory_server/client/__init__.py        |   0
 agent_memory_server/long_term_memory.py       |  10 +-
 agent_memory_server/models.py                 |   4 +-
 pyproject.toml                                |  12 +-
 tests/conftest.py                             |   4 +-
 tests/test_client_api.py                      |  58 +-
 tests/test_client_enhancements.py             |   2 +-
 tests/test_long_term_memory.py                |   6 +-
 uv.lock                                       |  33 +-
 23 files changed, 2637 insertions(+), 360 deletions(-)
 create mode 100644 .github/workflows/agent-memory-client.yml
 create mode 100644 agent-memory-client/README.md
 create mode 100644 agent-memory-client/agent_memory_client/__init__.py
 rename agent_memory_server/client/api.py => agent-memory-client/agent_memory_client/client.py (71%)
 create mode 100644 agent-memory-client/agent_memory_client/exceptions.py
 create mode 100644 agent-memory-client/agent_memory_client/filters.py
 create mode 100644 agent-memory-client/agent_memory_client/models.py
 create mode 100644 agent-memory-client/pyproject.toml
 create mode 100644 agent-memory-client/tests/test_basic.py
 create mode 100644 agent-memory-client/tests/test_client.py
 create mode 100644 agent-memory-client/uv.lock
 delete mode 100644 agent_memory_server/client/__init__.py

diff --git a/.github/workflows/agent-memory-client.yml b/.github/workflows/agent-memory-client.yml
new file mode 100644
index 0000000..8580bb1
--- /dev/null
+++ b/.github/workflows/agent-memory-client.yml
@@ -0,0 +1,136 @@
+name: Agent Memory Client CI
+
+on:
+  push:
+    branches: [main]
+    tags:
+      - 'client/v*.*.*'
+  pull_request:
+    branches: [main]
+
+jobs:
+  test:
+    name: Test (Python ${{ matrix.python-version }})
+    runs-on: ubuntu-latest
+    strategy:
+      fail-fast: false
+      matrix:
+        python-version: ["3.10", "3.11", "3.12"]
+    steps:
+      - uses: actions/checkout@v4
+
+      - name: Set up Python
+        uses: actions/setup-python@v4
+        with:
+          python-version: ${{ matrix.python-version }}
+
+      - name: Install uv
+        uses: astral-sh/setup-uv@v3
+
+      - name: Install dependencies
+        run: |
+          cd agent-memory-client
+          uv sync --extra dev
+
+      - name: Lint with Ruff
+        run: |
+          cd agent-memory-client
+          uv run ruff check agent_memory_client
+
+      - name: Check formatting with Ruff formatter
+        run: |
+          cd agent-memory-client
+          uv run ruff format --check agent_memory_client
+
+      - name: Type check with mypy
+        run: |
+          cd agent-memory-client
+          uv run mypy agent_memory_client
+
+      - name: Run tests
+        run: |
+          cd agent-memory-client
+          uv run pytest tests/ --cov=agent_memory_client --cov-report=xml
+
+  publish-testpypi:
+    name: Publish to TestPyPI
+    needs: test
+    if: startsWith(github.ref, 'refs/tags/client/') && contains(github.ref, '-')
+    runs-on: ubuntu-latest
+    environment: testpypi
+    permissions:
+      id-token: write
+      contents: read
+    steps:
+      - uses: actions/checkout@v4
+
+      - name: Set up Python
+        uses: actions/setup-python@v4
+        with:
+          python-version: '3.12'
+
+      - name: Install build tools
+        run: |
+          python -m pip install --upgrade pip
+          pip install build
+
+      - name: Build package
+        working-directory: agent-memory-client
+        run: python -m build
+
+      - name: Publish package to TestPyPI
+        uses: pypa/gh-action-pypi-publish@release/v1
+        with:
+          repository-url: https://test.pypi.org/legacy/
+          packages-dir: agent-memory-client/dist/
+
+  publish-pypi:
+    name: Publish to PyPI
+    needs: test
+    if: startsWith(github.ref, 'refs/tags/client/') && !contains(github.ref, '-')
+    runs-on: ubuntu-latest
+    environment: pypi
+    permissions:
+      id-token: write
+      contents: read
+    steps:
+      - uses: actions/checkout@v4
+
+      - name: Set up Python
+        uses: actions/setup-python@v4
+        with:
+          python-version: '3.12'
+
+      - name: Install build tools
+        run: |
+          python -m pip install --upgrade pip
+          pip install build
+
+      - name: Build package
+        working-directory: agent-memory-client
+        run: python -m build
+
+      - name: Publish package to PyPI
+        uses: pypa/gh-action-pypi-publish@release/v1
+        with:
+          packages-dir: agent-memory-client/dist/
+
+# Alternative: API Token Authentication (if trusted publishing doesn't work)
+# Uncomment the sections below and add these secrets to your repository:
+# - TEST_PYPI_API_TOKEN (for TestPyPI)
+# - PYPI_API_TOKEN (for PyPI)
+#
+# For TestPyPI job, replace the publish step with:
+#       - name: Publish package to TestPyPI
+#         uses: pypa/gh-action-pypi-publish@release/v1
+#         with:
+#           repository-url: https://test.pypi.org/legacy/
+#           packages-dir: agent-memory-client/dist/
+#           password: ${{ secrets.TEST_PYPI_API_TOKEN }}
+#
+# For PyPI job, replace the publish step with:
+#       - name: Publish package to PyPI
+#         uses: pypa/gh-action-pypi-publish@release/v1
+#         with:
+#           packages-dir: agent-memory-client/dist/
+#           password: ${{ secrets.PYPI_API_TOKEN }}
diff --git a/CLAUDE.md b/CLAUDE.md
index bba469c..200b038 100644
--- a/CLAUDE.md
+++ b/CLAUDE.md
@@ -8,11 +8,14 @@ pip install uv
 
 ```bash
 # Development workflow
-uv install                    # Install dependencies
+uv venv                      # Create a virtualenv (once)
+source .venv/bin/activate    # Activate the virtualenv (start of terminal session)
+uv install --all-extras      # Install dependencies
+uv sync --all-extras         # Sync latest dependencies
 uv run ruff check            # Run linting
 uv run ruff format           # Format code
-uv run pytest               # Run tests
-uv run pytest tests/        # Run specific test directory
+uv run pytest                # Run tests
+uv run pytest tests/         # Run specific test directory
 
 # Server commands
 uv run agent-memory api      # Start REST API server (default port 8000)
diff --git a/agent-memory-client/README.md b/agent-memory-client/README.md
new file mode 100644
index 0000000..6ac276e
--- /dev/null
+++ b/agent-memory-client/README.md
@@ -0,0 +1,318 @@
+# Agent Memory Client
+
+A Python client library for the [Agent Memory Server](https://github.com/your-org/agent-memory-server) REST API, providing comprehensive memory management capabilities for AI agents and applications.
+
+## Features
+
+- **Complete API Coverage**: Full support for all Agent Memory Server endpoints
+- **Memory Lifecycle Management**: Explicit control over working → long-term memory promotion
+- **Batch Operations**: Efficient bulk operations with built-in rate limiting
+- **Auto-Pagination**: Seamless iteration over large result sets
+- **Client-Side Validation**: Pre-flight validation to catch errors early
+- **Enhanced Convenience Methods**: Simplified APIs for common operations
+- **Type Safety**: Full type hints for better development experience
+- **Async-First**: Built for modern async Python applications
+
+## Installation
+
+```bash
+pip install agent-memory-client
+```
+
+## Quick Start
+
+```python
+import asyncio
+from agent_memory_client import create_memory_client, ClientMemoryRecord, MemoryTypeEnum
+
+async def main():
+    # Create a client instance
+    client = await create_memory_client(
+        base_url="http://localhost:8000",
+        default_namespace="my-app"
+    )
+
+    try:
+        # Create some memories
+        memories = [
+            ClientMemoryRecord(
+                text="User prefers dark mode",
+                memory_type=MemoryTypeEnum.SEMANTIC,
+                topics=["preferences", "ui"]
+            ),
+            ClientMemoryRecord(
+                text="User completed onboarding on 2024-01-15",
+                memory_type=MemoryTypeEnum.EPISODIC,
+                topics=["onboarding", "milestones"]
+            )
+        ]
+
+        # Store in long-term memory
+        await client.create_long_term_memory(memories)
+
+        # Search memories
+        results = await client.search_long_term_memory(
+            text="user interface preferences",
+            limit=10
+        )
+
+        print(f"Found {len(results.memories)} relevant memories")
+        for memory in results.memories:
+            print(f"- {memory.text} (distance: {memory.dist})")
+
+    finally:
+        await client.close()
+
+# Run the example
+asyncio.run(main())
+```
+
+## Core API
+
+### Client Setup
+
+```python
+from agent_memory_client import MemoryAPIClient, MemoryClientConfig
+
+# Manual configuration
+config = MemoryClientConfig(
+    base_url="http://localhost:8000",
+    timeout=30.0,
+    default_namespace="my-app"
+)
+client = MemoryAPIClient(config)
+
+# Or use the helper function
+client = await create_memory_client(
+    base_url="http://localhost:8000",
+    default_namespace="my-app"
+)
+```
+
+### Working Memory Operations
+
+```python
+from agent_memory_client import WorkingMemory, MemoryMessage
+
+# Create working memory with messages
+working_memory = WorkingMemory(
+    session_id="user-session-123",
+    messages=[
+        MemoryMessage(role="user", content="Hello!"),
+        MemoryMessage(role="assistant", content="Hi there! How can I help?")
+    ],
+    namespace="chat-app"
+)
+
+# Store working memory
+response = await client.put_session_memory("user-session-123", working_memory)
+
+# Retrieve working memory
+memory = await client.get_session_memory("user-session-123")
+
+# Convenience method for data storage
+await client.set_working_memory_data(
+    session_id="user-session-123",
+    data={"user_preferences": {"theme": "dark", "language": "en"}}
+)
+```
+
+### Long-Term Memory Operations
+
+```python
+from agent_memory_client import ClientMemoryRecord, MemoryTypeEnum
+
+# Create memories
+memories = [
+    ClientMemoryRecord(
+        text="User enjoys science fiction books",
+        memory_type=MemoryTypeEnum.SEMANTIC,
+        topics=["books", "preferences"],
+        user_id="user-123"
+    )
+]
+
+# Store memories
+await client.create_long_term_memory(memories)
+
+# Search with filters
+from agent_memory_client.filters import Topics, UserId
+
+results = await client.search_long_term_memory(
+    text="science fiction",
+    topics=Topics(any=["books", "entertainment"]),
+    user_id=UserId(eq="user-123"),
+    limit=20
+)
+```
+
+## Enhanced Features
+
+### Memory Lifecycle Management
+
+```python
+# Explicitly promote working memories to long-term storage
+await client.promote_working_memories_to_long_term(
+    session_id="user-session-123",
+    memory_ids=["memory-1", "memory-2"]  # Optional: specific memories
+)
+```
+
+### Batch Operations
+
+```python
+# Bulk create with rate limiting
+memory_batches = [batch1, batch2, batch3]
+results = await client.bulk_create_long_term_memories(
+    memory_batches=memory_batches,
+    batch_size=50,
+    delay_between_batches=0.1
+)
+```
+
+### Auto-Pagination
+
+```python
+# Iterate through all results automatically
+async for memory in client.search_all_long_term_memories(
+    text="user preferences",
+    batch_size=100
+):
+    print(f"Memory: {memory.text}")
+```
+
+### Client-Side Validation
+
+```python
+from agent_memory_client.exceptions import MemoryValidationError
+
+try:
+    # Validate before sending
+    client.validate_memory_record(memory)
+    client.validate_search_filters(limit=10, offset=0)
+except MemoryValidationError as e:
+    print(f"Validation error: {e}")
+```
+
+### Enhanced Convenience Methods
+
+```python
+# Update working memory data with merge strategies
+await client.update_working_memory_data(
+    session_id="user-session-123",
+    data_updates={"new_setting": "value"},
+    merge_strategy="deep_merge"  # "replace", "merge", or "deep_merge"
+)
+
+# Append messages efficiently
+new_messages = [
+    MemoryMessage(role="user", content="What's the weather?"),
+    MemoryMessage(role="assistant", content="It's sunny today!")
+]
+
+await client.append_messages_to_working_memory(
+    session_id="user-session-123",
+    messages=new_messages
+)
+```
+
+## Advanced Filtering
+
+```python
+from agent_memory_client.filters import (
+    SessionId, Namespace, Topics, Entities,
+    CreatedAt, LastAccessed, UserId, MemoryType
+)
+from datetime import datetime, timezone
+
+# Complex search with multiple filters
+results = await client.search_long_term_memory(
+    text="machine learning",
+    session_id=SessionId(in_=["session-1", "session-2"]),
+    namespace=Namespace(eq="ai-research"),
+    topics=Topics(any=["ml", "ai"], none=["deprecated"]),
+    entities=Entities(all=["tensorflow", "python"]),
+    created_at=CreatedAt(gte=datetime(2024, 1, 1, tzinfo=timezone.utc)),
+    user_id=UserId(eq="researcher-123"),
+    memory_type=MemoryType(eq="semantic"),
+    distance_threshold=0.8,
+    limit=50
+)
+```
+
+## Error Handling
+
+```python
+from agent_memory_client.exceptions import (
+    MemoryClientError,
+    MemoryValidationError,
+    MemoryNotFoundError,
+    MemoryServerError
+)
+
+try:
+    memory = await client.get_session_memory("nonexistent-session")
+except MemoryNotFoundError:
+    print("Session not found")
+except MemoryServerError as e:
+    print(f"Server error {e.status_code}: {e}")
+except MemoryClientError as e:
+    print(f"Client error: {e}")
+```
+
+## Context Manager Usage
+
+```python
+async with create_memory_client("http://localhost:8000") as client:
+    # Client will be automatically closed when exiting the context
+    results = await client.search_long_term_memory("search query")
+```
+
+## Development
+
+### Running Tests
+
+```bash
+# Install development dependencies
+pip install -e ".[dev]"
+
+# Run tests
+pytest
+
+# Run tests with coverage
+pytest --cov=agent_memory_client
+```
+
+### Code Quality
+
+```bash
+# Format code
+black agent_memory_client/
+
+# Lint code
+ruff check agent_memory_client/
+
+# Type checking
+mypy agent_memory_client/
+```
+
+## Requirements
+
+- Python 3.10+
+- httpx >= 0.25.0
+- pydantic >= 2.0.0
+- ulid-py >= 1.1.0
+
+## License
+
+MIT License - see [LICENSE](LICENSE) file for details.
+
+## Contributing
+
+Contributions are welcome! Please see the [main repository](https://github.com/your-org/agent-memory-server) for contribution guidelines.
+
+## Links
+
+- [Agent Memory Server](https://github.com/your-org/agent-memory-server) - The server this client connects to
+- [Documentation](https://agent-memory-client.readthedocs.io) - Full API documentation
+- [Issues](https://github.com/your-org/agent-memory-client/issues) - Bug reports and feature requests
diff --git a/agent-memory-client/agent_memory_client/__init__.py b/agent-memory-client/agent_memory_client/__init__.py
new file mode 100644
index 0000000..a47da80
--- /dev/null
+++ b/agent-memory-client/agent_memory_client/__init__.py
@@ -0,0 +1,34 @@
+"""
+Agent Memory Client
+
+A Python client for the Agent Memory Server REST API providing comprehensive
+memory management capabilities for AI agents and applications.
+"""
+
+__version__ = "0.9.0"
+
+from .client import MemoryAPIClient, MemoryClientConfig, create_memory_client
+from .exceptions import (
+    MemoryClientError,
+    MemoryNotFoundError,
+    MemoryServerError,
+    MemoryValidationError,
+)
+from .models import (
+    # Re-export essential models for convenience
+    ModelNameLiteral,
+)
+
+__all__ = [
+    # Client classes
+    "MemoryAPIClient",
+    "MemoryClientConfig",
+    "create_memory_client",
+    # Exceptions
+    "MemoryClientError",
+    "MemoryValidationError",
+    "MemoryNotFoundError",
+    "MemoryServerError",
+    # Types
+    "ModelNameLiteral",
+]
diff --git a/agent_memory_server/client/api.py b/agent-memory-client/agent_memory_client/client.py
similarity index 71%
rename from agent_memory_server/client/api.py
rename to agent-memory-client/agent_memory_client/client.py
index 991f129..9c35646 100644
--- a/agent_memory_server/client/api.py
+++ b/agent-memory-client/agent_memory_client/client.py
@@ -1,20 +1,22 @@
 """
-Redis Memory Server API Client
+Agent Memory Server API Client
 
-This module provides a client for the REST API of the Redis Memory Server.
+This module provides a standalone client for the REST API of the Agent Memory Server.
 """
 
 import asyncio
 import contextlib
+import re
 from collections.abc import AsyncIterator
 from datetime import datetime
 from typing import Any, Literal
 
 import httpx
+import ulid
 from pydantic import BaseModel
-from ulid import ULID
 
-from agent_memory_server.filters import (
+from .exceptions import MemoryClientError, MemoryServerError, MemoryValidationError
+from .filters import (
     CreatedAt,
     Entities,
     LastAccessed,
@@ -24,51 +26,19 @@
     Topics,
     UserId,
 )
-from agent_memory_server.models import (
+from .models import (
     AckResponse,
     ClientMemoryRecord,
-    CreateMemoryRecordRequest,
     HealthCheckResponse,
-    MemoryPromptRequest,
-    MemoryPromptResponse,
     MemoryRecord,
     MemoryRecordResults,
-    SearchRequest,
+    ModelNameLiteral,
     SessionListResponse,
     WorkingMemory,
-    WorkingMemoryRequest,
     WorkingMemoryResponse,
 )
 
 
-# Model name literals for model-specific window sizes
-ModelNameLiteral = Literal[
-    "gpt-3.5-turbo",
-    "gpt-3.5-turbo-16k",
-    "gpt-4",
-    "gpt-4-32k",
-    "gpt-4o",
-    "gpt-4o-mini",
-    "o1",
-    "o1-mini",
-    "o3-mini",
-    "text-embedding-ada-002",
-    "text-embedding-3-small",
-    "text-embedding-3-large",
-    "claude-3-opus-20240229",
-    "claude-3-sonnet-20240229",
-    "claude-3-haiku-20240307",
-    "claude-3-5-sonnet-20240620",
-    "claude-3-7-sonnet-20250219",
-    "claude-3-5-sonnet-20241022",
-    "claude-3-5-haiku-20241022",
-    "claude-3-7-sonnet-latest",
-    "claude-3-5-sonnet-latest",
-    "claude-3-5-haiku-latest",
-    "claude-3-opus-latest",
-]
-
-
 class MemoryClientConfig(BaseModel):
     """Configuration for the Memory API Client"""
 
@@ -79,12 +49,13 @@ class MemoryClientConfig(BaseModel):
 
 class MemoryAPIClient:
     """
-    Client for the Redis Memory Server REST API.
+    Client for the Agent Memory Server REST API.
 
     This client provides methods to interact with all server endpoints:
     - Health check
     - Session management (list, get, put, delete)
     - Long-term memory (create, search)
+    - Enhanced functionality (lifecycle, batch, pagination, validation)
     """
 
     def __init__(self, config: MemoryClientConfig):
@@ -112,6 +83,20 @@ async def __aexit__(self, exc_type, exc_val, exc_tb):
         """Close the client when exiting the context manager."""
         await self.close()
 
+    def _handle_http_error(self, response: httpx.Response) -> None:
+        """Handle HTTP errors and convert to appropriate exceptions."""
+        if response.status_code == 404:
+            from .exceptions import MemoryNotFoundError
+
+            raise MemoryNotFoundError(f"Resource not found: {response.url}")
+        elif response.status_code >= 400:
+            try:
+                error_data = response.json()
+                message = error_data.get("detail", f"HTTP {response.status_code}")
+            except Exception:
+                message = f"HTTP {response.status_code}: {response.text}"
+            raise MemoryServerError(message, response.status_code)
+
     async def health_check(self) -> HealthCheckResponse:
         """
         Check the health of the memory server.
@@ -119,9 +104,13 @@ async def health_check(self) -> HealthCheckResponse:
         Returns:
             HealthCheckResponse with current server timestamp
         """
-        response = await self._client.get("/v1/health")
-        response.raise_for_status()
-        return HealthCheckResponse(**response.json())
+        try:
+            response = await self._client.get("/v1/health")
+            response.raise_for_status()
+            return HealthCheckResponse(**response.json())
+        except httpx.HTTPStatusError as e:
+            self._handle_http_error(e.response)
+            raise
 
     async def list_sessions(
         self, limit: int = 20, offset: int = 0, namespace: str | None = None
@@ -146,9 +135,13 @@ async def list_sessions(
         elif self.config.default_namespace is not None:
             params["namespace"] = self.config.default_namespace
 
-        response = await self._client.get("/v1/working-memory/", params=params)
-        response.raise_for_status()
-        return SessionListResponse(**response.json())
+        try:
+            response = await self._client.get("/v1/working-memory/", params=params)
+            response.raise_for_status()
+            return SessionListResponse(**response.json())
+        except httpx.HTTPStatusError as e:
+            self._handle_http_error(e.response)
+            raise
 
     async def get_session_memory(
         self,
@@ -172,7 +165,8 @@ async def get_session_memory(
             WorkingMemoryResponse containing messages, context and metadata
 
         Raises:
-            httpx.HTTPStatusError: If the session is not found (404) or other errors
+            MemoryNotFoundError: If the session is not found
+            MemoryServerError: For other server errors
         """
         params = {}
 
@@ -190,11 +184,15 @@ async def get_session_memory(
         if context_window_max is not None:
             params["context_window_max"] = context_window_max
 
-        response = await self._client.get(
-            f"/v1/working-memory/{session_id}", params=params
-        )
-        response.raise_for_status()
-        return WorkingMemoryResponse(**response.json())
+        try:
+            response = await self._client.get(
+                f"/v1/working-memory/{session_id}", params=params
+            )
+            response.raise_for_status()
+            return WorkingMemoryResponse(**response.json())
+        except httpx.HTTPStatusError as e:
+            self._handle_http_error(e.response)
+            raise
 
     async def put_session_memory(
         self, session_id: str, memory: WorkingMemory
@@ -213,12 +211,16 @@ async def put_session_memory(
         if memory.namespace is None and self.config.default_namespace is not None:
             memory.namespace = self.config.default_namespace
 
-        response = await self._client.put(
-            f"/v1/working-memory/{session_id}",
-            json=memory.model_dump(exclude_none=True, mode="json"),
-        )
-        response.raise_for_status()
-        return WorkingMemoryResponse(**response.json())
+        try:
+            response = await self._client.put(
+                f"/v1/working-memory/{session_id}",
+                json=memory.model_dump(exclude_none=True, mode="json"),
+            )
+            response.raise_for_status()
+            return WorkingMemoryResponse(**response.json())
+        except httpx.HTTPStatusError as e:
+            self._handle_http_error(e.response)
+            raise
 
     async def delete_session_memory(
         self, session_id: str, namespace: str | None = None
@@ -239,11 +241,15 @@ async def delete_session_memory(
         elif self.config.default_namespace is not None:
             params["namespace"] = self.config.default_namespace
 
-        response = await self._client.delete(
-            f"/v1/working-memory/{session_id}", params=params
-        )
-        response.raise_for_status()
-        return AckResponse(**response.json())
+        try:
+            response = await self._client.delete(
+                f"/v1/working-memory/{session_id}", params=params
+            )
+            response.raise_for_status()
+            return AckResponse(**response.json())
+        except httpx.HTTPStatusError as e:
+            self._handle_http_error(e.response)
+            raise
 
     async def set_working_memory_data(
         self,
@@ -356,7 +362,7 @@ async def add_memories_to_working_memory(
         # Auto-generate IDs for memories that don't have them
         for memory in final_memories:
             if not memory.id:
-                memory.id = str(ULID())
+                memory.id = str(ulid.new())
 
         # Create new working memory with the memories
         working_memory = WorkingMemory(
@@ -384,7 +390,7 @@ async def create_long_term_memory(
             AckResponse indicating success
 
         Raises:
-            httpx.HTTPStatusError: If long-term memory is disabled (400) or other errors
+            MemoryServerError: If long-term memory is disabled or other errors
         """
         # Apply default namespace if needed
         if self.config.default_namespace is not None:
@@ -392,13 +398,20 @@ async def create_long_term_memory(
                 if memory.namespace is None:
                     memory.namespace = self.config.default_namespace
 
-        payload = CreateMemoryRecordRequest(memories=memories)
-        response = await self._client.post(
-            "/v1/long-term-memory/",
-            json=payload.model_dump(exclude_none=True, mode="json"),
-        )
-        response.raise_for_status()
-        return AckResponse(**response.json())
+        payload = {
+            "memories": [m.model_dump(exclude_none=True, mode="json") for m in memories]
+        }
+
+        try:
+            response = await self._client.post(
+                "/v1/long-term-memory/",
+                json=payload,
+            )
+            response.raise_for_status()
+            return AckResponse(**response.json())
+        except httpx.HTTPStatusError as e:
+            self._handle_http_error(e.response)
+            raise
 
     async def search_long_term_memory(
         self,
@@ -428,6 +441,7 @@ async def search_long_term_memory(
             last_accessed: Optional last accessed date filter
             user_id: Optional user ID filter
             distance_threshold: Optional distance threshold for search results
+            memory_type: Optional memory type filter
             limit: Maximum number of results to return (default: 10)
             offset: Offset for pagination (default: 0)
 
@@ -435,7 +449,7 @@ async def search_long_term_memory(
             MemoryRecordResults with matching memories and metadata
 
         Raises:
-            httpx.HTTPStatusError: If long-term memory is disabled (400) or other errors
+            MemoryServerError: If long-term memory is disabled or other errors
         """
         # Convert dictionary filters to their proper filter objects if needed
         if isinstance(session_id, dict):
@@ -459,27 +473,46 @@ async def search_long_term_memory(
         if namespace is None and self.config.default_namespace is not None:
             namespace = Namespace(eq=self.config.default_namespace)
 
-        payload = SearchRequest(
-            text=text,
-            session_id=session_id,
-            namespace=namespace,
-            topics=topics,
-            entities=entities,
-            created_at=created_at,
-            last_accessed=last_accessed,
-            user_id=user_id,
-            distance_threshold=distance_threshold,
-            memory_type=memory_type,
-            limit=limit,
-            offset=offset,
-        )
+        payload = {
+            "text": text,
+            "limit": limit,
+            "offset": offset,
+        }
 
-        response = await self._client.post(
-            "/v1/long-term-memory/search",
-            json=payload.model_dump(exclude_none=True, mode="json"),
-        )
-        response.raise_for_status()
-        return MemoryRecordResults(**response.json())
+        # Add filters if provided
+        if session_id:
+            payload["session_id"] = session_id.model_dump(exclude_none=True)
+        if namespace:
+            payload["namespace"] = namespace.model_dump(exclude_none=True)
+        if topics:
+            payload["topics"] = topics.model_dump(exclude_none=True)
+        if entities:
+            payload["entities"] = entities.model_dump(exclude_none=True)
+        if created_at:
+            payload["created_at"] = created_at.model_dump(
+                exclude_none=True, mode="json"
+            )
+        if last_accessed:
+            payload["last_accessed"] = last_accessed.model_dump(
+                exclude_none=True, mode="json"
+            )
+        if user_id:
+            payload["user_id"] = user_id.model_dump(exclude_none=True)
+        if memory_type:
+            payload["memory_type"] = memory_type.model_dump(exclude_none=True)
+        if distance_threshold is not None:
+            payload["distance_threshold"] = distance_threshold
+
+        try:
+            response = await self._client.post(
+                "/v1/long-term-memory/search",
+                json=payload,
+            )
+            response.raise_for_status()
+            return MemoryRecordResults(**response.json())
+        except httpx.HTTPStatusError as e:
+            self._handle_http_error(e.response)
+            raise
 
     async def search_memories(
         self,
@@ -530,7 +563,7 @@ async def search_memories(
             MemoryRecordResults with matching memories from both memory types
 
         Raises:
-            httpx.HTTPStatusError: If the request fails
+            MemoryServerError: If the request fails
         """
         # Convert dictionary filters to their proper filter objects if needed
         if isinstance(session_id, dict):
@@ -554,222 +587,46 @@ async def search_memories(
         if namespace is None and self.config.default_namespace is not None:
             namespace = Namespace(eq=self.config.default_namespace)
 
-        payload = SearchRequest(
-            text=text,
-            session_id=session_id,
-            namespace=namespace,
-            topics=topics,
-            entities=entities,
-            created_at=created_at,
-            last_accessed=last_accessed,
-            user_id=user_id,
-            distance_threshold=distance_threshold,
-            memory_type=memory_type,
-            limit=limit,
-            offset=offset,
-        )
-
-        response = await self._client.post(
-            "/v1/memory/search",
-            json=payload.model_dump(exclude_none=True, mode="json"),
-        )
-        response.raise_for_status()
-        return MemoryRecordResults(**response.json())
-
-    async def memory_prompt(
-        self,
-        query: str,
-        session_id: str | None = None,
-        namespace: str | None = None,
-        window_size: int | None = None,
-        model_name: ModelNameLiteral | None = None,
-        context_window_max: int | None = None,
-        long_term_search: SearchRequest | None = None,
-    ) -> MemoryPromptResponse:
-        """
-        Hydrate a user query with memory context and return a prompt
-        ready to send to an LLM.
-
-        This method can retrieve relevant session history and long-term memories
-        to provide context for the query.
-
-        Args:
-            query: The user's query text
-            session_id: Optional session ID to retrieve history from
-            namespace: Optional namespace for session and long-term memories
-            window_size: Optional number of messages to include from session history
-            model_name: Optional model name to determine context window size
-            context_window_max: Optional direct specification of context window max tokens
-            long_term_search: Optional SearchRequest for specific long-term memory filtering
-
-        Returns:
-            MemoryPromptResponse containing a list of messages with context
+        payload = {
+            "text": text,
+            "limit": limit,
+            "offset": offset,
+        }
 
-        Raises:
-            httpx.HTTPStatusError: If the request fails or if neither session_id nor long_term_search is provided
-        """
-        # Prepare the request payload
-        session_params = None
-        if session_id is not None:
-            session_params = WorkingMemoryRequest(
-                session_id=session_id,
-                namespace=namespace or self.config.default_namespace,
-                window_size=window_size or 12,  # Default from settings
-                model_name=model_name,
-                context_window_max=context_window_max,
+        # Add filters if provided
+        if session_id:
+            payload["session_id"] = session_id.model_dump(exclude_none=True)
+        if namespace:
+            payload["namespace"] = namespace.model_dump(exclude_none=True)
+        if topics:
+            payload["topics"] = topics.model_dump(exclude_none=True)
+        if entities:
+            payload["entities"] = entities.model_dump(exclude_none=True)
+        if created_at:
+            payload["created_at"] = created_at.model_dump(
+                exclude_none=True, mode="json"
             )
-
-        # If no explicit long_term_search is provided but we have a query, create a basic one
-        if long_term_search is None and query:
-            # Use default namespace from config if none provided
-            _namespace = None
-            if namespace is not None:
-                _namespace = Namespace(eq=namespace)
-            elif self.config.default_namespace is not None:
-                _namespace = Namespace(eq=self.config.default_namespace)
-
-            long_term_search = SearchRequest(
-                text=query,
-                namespace=_namespace,
+        if last_accessed:
+            payload["last_accessed"] = last_accessed.model_dump(
+                exclude_none=True, mode="json"
             )
+        if user_id:
+            payload["user_id"] = user_id.model_dump(exclude_none=True)
+        if memory_type:
+            payload["memory_type"] = memory_type.model_dump(exclude_none=True)
+        if distance_threshold is not None:
+            payload["distance_threshold"] = distance_threshold
 
-        # Create the request payload
-        payload = MemoryPromptRequest(
-            query=query,
-            session=session_params,
-            long_term_search=long_term_search,
-        )
-
-        # Make the API call
-        response = await self._client.post(
-            "/v1/memory/prompt", json=payload.model_dump(exclude_none=True, mode="json")
-        )
-        response.raise_for_status()
-        data = response.json()
-        return MemoryPromptResponse(**data)
-
-    async def hydrate_memory_prompt(
-        self,
-        query: str,
-        session_id: SessionId | dict[str, Any] | None = None,
-        namespace: Namespace | dict[str, Any] | None = None,
-        topics: Topics | dict[str, Any] | None = None,
-        entities: Entities | dict[str, Any] | None = None,
-        created_at: CreatedAt | dict[str, Any] | None = None,
-        last_accessed: LastAccessed | dict[str, Any] | None = None,
-        user_id: UserId | dict[str, Any] | None = None,
-        distance_threshold: float | None = None,
-        memory_type: MemoryType | dict[str, Any] | None = None,
-        limit: int = 10,
-        offset: int = 0,
-        window_size: int = 12,
-        model_name: ModelNameLiteral | None = None,
-        context_window_max: int | None = None,
-    ) -> MemoryPromptResponse:
-        """
-        Hydrate a user query with relevant session history and long-term memories.
-
-        This method enriches the user's query by retrieving:
-        1. Context from the conversation session (if session_id is provided)
-        2. Relevant long-term memories related to the query
-
-        Args:
-            query: The user's query text
-            session_id: Optional filter for session ID
-            namespace: Optional filter for namespace
-            topics: Optional filter for topics in long-term memories
-            entities: Optional filter for entities in long-term memories
-            created_at: Optional filter for creation date
-            last_accessed: Optional filter for last access date
-            user_id: Optional filter for user ID
-            distance_threshold: Optional distance threshold for semantic search
-            memory_type: Optional filter for memory type
-            limit: Maximum number of long-term memory results (default: 10)
-            offset: Offset for pagination (default: 0)
-            window_size: Number of messages to include from session history (default: 12)
-            model_name: Optional model name to determine context window size
-            context_window_max: Optional direct specification of context window max tokens
-
-        Returns:
-            MemoryPromptResponse containing a list of messages with context
-
-        Raises:
-            httpx.HTTPStatusError: If the request fails
-        """
-        # Convert dictionary filters to their proper filter objects if needed
-        if isinstance(session_id, dict):
-            session_id = SessionId(**session_id)
-        if isinstance(namespace, dict):
-            namespace = Namespace(**namespace)
-        if isinstance(topics, dict):
-            topics = Topics(**topics)
-        if isinstance(entities, dict):
-            entities = Entities(**entities)
-        if isinstance(created_at, dict):
-            created_at = CreatedAt(**created_at)
-        if isinstance(last_accessed, dict):
-            last_accessed = LastAccessed(**last_accessed)
-        if isinstance(user_id, dict):
-            user_id = UserId(**user_id)
-        if isinstance(memory_type, dict):
-            memory_type = MemoryType(**memory_type)
-
-        # Apply default namespace if needed and no namespace filter specified
-        if namespace is None and self.config.default_namespace is not None:
-            namespace = Namespace(eq=self.config.default_namespace)
-
-        # Extract session_id value if it exists
-        session_params = None
-        _session_id = None
-        if session_id and hasattr(session_id, "eq") and session_id.eq:
-            _session_id = session_id.eq
-
-        if _session_id:
-            # Get namespace value if it exists
-            _namespace = None
-            if namespace and hasattr(namespace, "eq"):
-                _namespace = namespace.eq
-            elif self.config.default_namespace:
-                _namespace = self.config.default_namespace
-
-            session_params = WorkingMemoryRequest(
-                session_id=_session_id,
-                namespace=_namespace,
-                window_size=window_size,
-                model_name=model_name,
-                context_window_max=context_window_max,
+        try:
+            response = await self._client.post(
+                "/v1/memory/search",
+                json=payload,
             )
-
-        # Create search request for long-term memory
-        search_payload = SearchRequest(
-            text=query,
-            session_id=session_id,
-            namespace=namespace,
-            topics=topics,
-            entities=entities,
-            created_at=created_at,
-            last_accessed=last_accessed,
-            user_id=user_id,
-            distance_threshold=distance_threshold,
-            memory_type=memory_type,
-            limit=limit,
-            offset=offset,
-        )
-
-        # Create the request payload
-        payload = MemoryPromptRequest(
-            query=query,
-            session=session_params,
-            long_term_search=search_payload,
-        )
-
-        # Make the API call
-        response = await self._client.post(
-            "/v1/memory/prompt", json=payload.model_dump(exclude_none=True, mode="json")
-        )
-        response.raise_for_status()
-        data = response.json()
-        return MemoryPromptResponse(**data)
+            response.raise_for_status()
+            return MemoryRecordResults(**response.json())
+        except httpx.HTTPStatusError as e:
+            self._handle_http_error(e.response)
+            raise
 
     # === Memory Lifecycle Management ===
 
@@ -977,8 +834,6 @@ async def search_all_memories(
 
             offset += batch_size
 
-    # === Client-Side Validation ===
-
     def validate_memory_record(self, memory: ClientMemoryRecord | MemoryRecord) -> None:
         """
         Validate memory record before sending to server.
@@ -991,20 +846,20 @@ def validate_memory_record(self, memory: ClientMemoryRecord | MemoryRecord) -> N
         - ID format is valid
 
         Raises:
-            ValueError: If validation fails with descriptive message
+            MemoryValidationError: If validation fails with descriptive message
         """
         if not memory.text or not memory.text.strip():
-            raise ValueError("Memory text cannot be empty")
+            raise MemoryValidationError("Memory text cannot be empty")
 
         if memory.memory_type not in [
             "episodic",
             "semantic",
             "message",
         ]:
-            raise ValueError(f"Invalid memory type: {memory.memory_type}")
+            raise MemoryValidationError(f"Invalid memory type: {memory.memory_type}")
 
         if memory.id and not self._is_valid_ulid(memory.id):
-            raise ValueError(f"Invalid ID format: {memory.id}")
+            raise MemoryValidationError(f"Invalid ID format: {memory.id}")
 
         if (
             hasattr(memory, "created_at")
@@ -1014,7 +869,7 @@ def validate_memory_record(self, memory: ClientMemoryRecord | MemoryRecord) -> N
             try:
                 datetime.fromisoformat(str(memory.created_at))
             except ValueError as e:
-                raise ValueError(
+                raise MemoryValidationError(
                     f"Invalid created_at format: {memory.created_at}"
                 ) from e
 
@@ -1026,7 +881,7 @@ def validate_memory_record(self, memory: ClientMemoryRecord | MemoryRecord) -> N
             try:
                 datetime.fromisoformat(str(memory.last_accessed))
             except ValueError as e:
-                raise ValueError(
+                raise MemoryValidationError(
                     f"Invalid last_accessed format: {memory.last_accessed}"
                 ) from e
 
@@ -1048,33 +903,31 @@ def validate_search_filters(self, **filters) -> None:
 
         for key in filters:
             if key not in valid_filter_keys:
-                raise ValueError(f"Invalid filter key: {key}")
+                raise MemoryValidationError(f"Invalid filter key: {key}")
 
         if "limit" in filters and (
             not isinstance(filters["limit"], int) or filters["limit"] <= 0
         ):
-            raise ValueError("Limit must be a positive integer")
+            raise MemoryValidationError("Limit must be a positive integer")
 
         if "offset" in filters and (
             not isinstance(filters["offset"], int) or filters["offset"] < 0
         ):
-            raise ValueError("Offset must be a non-negative integer")
+            raise MemoryValidationError("Offset must be a non-negative integer")
 
         if "distance_threshold" in filters and (
             not isinstance(filters["distance_threshold"], int | float)
             or filters["distance_threshold"] < 0
         ):
-            raise ValueError("Distance threshold must be a non-negative number")
+            raise MemoryValidationError(
+                "Distance threshold must be a non-negative number"
+            )
 
-    def _is_valid_ulid(self, ulid_str: str) -> bool:
-        """Check if a string is a valid ULID format."""
-        try:
-            ULID.from_str(ulid_str)
-            return True
-        except ValueError:
-            return False
+    _ULID_REGEX = re.compile(r"[0-7][0-9A-HJKMNP-TV-Z]{25}")
 
-    # === Enhanced Convenience Methods ===
+    def _is_valid_ulid(self, ulid_str: str) -> bool:
+        """Return True if a string looks like a valid Crockford-base32 ULID."""
+        return bool(self._ULID_REGEX.fullmatch(ulid_str))
 
     async def update_working_memory_data(
         self,
@@ -1111,7 +964,7 @@ async def update_working_memory_data(
             elif merge_strategy == "deep_merge":
                 final_data = self._deep_merge_dicts(existing_memory.data, data_updates)
             else:
-                raise ValueError(f"Invalid merge strategy: {merge_strategy}")
+                raise MemoryValidationError(f"Invalid merge strategy: {merge_strategy}")
         else:
             final_data = data_updates
 
@@ -1156,10 +1009,32 @@ async def append_messages_to_working_memory(
                 session_id=session_id, namespace=namespace
             )
 
-        # Combine messages
-        final_messages = (
-            existing_memory.messages if existing_memory else []
-        ) + messages
+        # Combine messages - convert MemoryMessage objects to dicts if needed
+        existing_messages = existing_memory.messages if existing_memory else []
+
+        # Convert existing messages to dict format if they're objects
+        converted_existing_messages = []
+        for msg in existing_messages:
+            if hasattr(msg, "model_dump"):
+                converted_existing_messages.append(msg.model_dump())
+            elif hasattr(msg, "role") and hasattr(msg, "content"):
+                converted_existing_messages.append(
+                    {"role": msg.role, "content": msg.content}
+                )
+            else:
+                converted_existing_messages.append(msg)
+
+        # Convert new messages to dict format if they're objects
+        new_messages = []
+        for msg in messages:
+            if hasattr(msg, "model_dump"):
+                new_messages.append(msg.model_dump())
+            elif hasattr(msg, "role") and hasattr(msg, "content"):
+                new_messages.append({"role": msg.role, "content": msg.content})
+            else:
+                new_messages.append(msg)
+
+        final_messages = converted_existing_messages + new_messages
 
         # Create updated working memory
         working_memory = WorkingMemory(
@@ -1174,6 +1049,128 @@ async def append_messages_to_working_memory(
 
         return await self.put_session_memory(session_id, working_memory)
 
+    async def memory_prompt(
+        self,
+        query: str,
+        session_id: str | None = None,
+        namespace: str | None = None,
+        window_size: int | None = None,
+        model_name: str | None = None,
+        context_window_max: int | None = None,
+        long_term_search: dict[str, Any] | None = None,
+    ) -> dict[str, Any]:
+        """
+        Hydrate a user query with memory context and return a prompt ready to send to an LLM.
+
+        Args:
+            query: The input text to find relevant context for
+            session_id: Optional session ID to include session messages
+            namespace: Optional namespace for the session
+            window_size: Optional number of messages to include
+            model_name: Optional model name to determine context window size
+            context_window_max: Optional direct specification of context window tokens
+            long_term_search: Optional search parameters for long-term memory
+
+        Returns:
+            Dict with messages hydrated with relevant memory context
+        """
+        payload = {"query": query}
+
+        # Add session parameters if provided
+        if session_id is not None:
+            session_params = {"session_id": session_id}
+            if namespace is not None:
+                session_params["namespace"] = namespace
+            elif self.config.default_namespace is not None:
+                session_params["namespace"] = self.config.default_namespace
+            if window_size is not None:
+                session_params["window_size"] = window_size
+            if model_name is not None:
+                session_params["model_name"] = model_name
+            if context_window_max is not None:
+                session_params["context_window_max"] = context_window_max
+            payload["session"] = session_params
+
+        # Add long-term search parameters if provided
+        if long_term_search is not None:
+            payload["long_term_search"] = long_term_search
+
+        try:
+            response = await self._client.post(
+                "/v1/memory/prompt",
+                json=payload,
+            )
+            response.raise_for_status()
+            return response.json()
+        except httpx.HTTPStatusError as e:
+            self._handle_http_error(e.response)
+            raise
+
+    async def hydrate_memory_prompt(
+        self,
+        query: str,
+        session_id: dict[str, Any] | None = None,
+        namespace: dict[str, Any] | None = None,
+        topics: dict[str, Any] | None = None,
+        entities: dict[str, Any] | None = None,
+        created_at: dict[str, Any] | None = None,
+        last_accessed: dict[str, Any] | None = None,
+        user_id: dict[str, Any] | None = None,
+        distance_threshold: float | None = None,
+        memory_type: dict[str, Any] | None = None,
+        limit: int = 10,
+    ) -> dict[str, Any]:
+        """
+        Hydrate a user query with long-term memory context using filters.
+
+        This is a convenience method that creates a memory prompt using only
+        long-term memory search with the specified filters.
+
+        Args:
+            query: The input text to find relevant context for
+            session_id: Optional session ID filter (as dict)
+            namespace: Optional namespace filter (as dict)
+            topics: Optional topics filter (as dict)
+            entities: Optional entities filter (as dict)
+            created_at: Optional creation date filter (as dict)
+            last_accessed: Optional last accessed date filter (as dict)
+            user_id: Optional user ID filter (as dict)
+            distance_threshold: Optional distance threshold
+            memory_type: Optional memory type filter (as dict)
+            limit: Maximum number of long-term memories to include
+
+        Returns:
+            Dict with messages hydrated with relevant long-term memories
+        """
+        # Build long-term search parameters
+        long_term_search = {"limit": limit}
+
+        if session_id is not None:
+            long_term_search["session_id"] = session_id
+        if namespace is not None:
+            long_term_search["namespace"] = namespace
+        elif self.config.default_namespace is not None:
+            long_term_search["namespace"] = {"eq": self.config.default_namespace}
+        if topics is not None:
+            long_term_search["topics"] = topics
+        if entities is not None:
+            long_term_search["entities"] = entities
+        if created_at is not None:
+            long_term_search["created_at"] = created_at
+        if last_accessed is not None:
+            long_term_search["last_accessed"] = last_accessed
+        if user_id is not None:
+            long_term_search["user_id"] = user_id
+        if distance_threshold is not None:
+            long_term_search["distance_threshold"] = distance_threshold
+        if memory_type is not None:
+            long_term_search["memory_type"] = memory_type
+
+        return await self.memory_prompt(
+            query=query,
+            long_term_search=long_term_search,
+        )
+
     def _deep_merge_dicts(self, base: dict, updates: dict) -> dict:
         """Recursively merge two dictionaries."""
         result = base.copy()
@@ -1203,6 +1200,9 @@ async def create_memory_client(
 
     Returns:
         Initialized MemoryAPIClient instance
+
+    Raises:
+        MemoryClientError: If unable to connect to the server
     """
     config = MemoryClientConfig(
         base_url=base_url,
@@ -1216,7 +1216,7 @@ async def create_memory_client(
         await client.health_check()
     except Exception as e:
         await client.close()
-        raise ConnectionError(
+        raise MemoryClientError(
             f"Failed to connect to memory server at {base_url}: {e}"
         ) from e
 
diff --git a/agent-memory-client/agent_memory_client/exceptions.py b/agent-memory-client/agent_memory_client/exceptions.py
new file mode 100644
index 0000000..8309b9c
--- /dev/null
+++ b/agent-memory-client/agent_memory_client/exceptions.py
@@ -0,0 +1,34 @@
+"""
+Exception classes for the Agent Memory Client.
+"""
+
+
+class MemoryClientError(Exception):
+    """Base exception for all memory client errors."""
+
+    pass
+
+
+class MemoryValidationError(MemoryClientError, ValueError):
+    """Raised when memory record or filter validation fails.
+
+    Subclassing ``ValueError`` ensures that client code (and our test suite)
+    can catch validation issues using the built-in exception while still
+    signaling a distinct, library-specific error type when desired.
+    """
+
+    pass
+
+
+class MemoryNotFoundError(MemoryClientError):
+    """Raised when a requested memory or session is not found."""
+
+    pass
+
+
+class MemoryServerError(MemoryClientError):
+    """Raised when the memory server returns an error."""
+
+    def __init__(self, message: str, status_code: int | None = None):
+        super().__init__(message)
+        self.status_code = status_code
diff --git a/agent-memory-client/agent_memory_client/filters.py b/agent-memory-client/agent_memory_client/filters.py
new file mode 100644
index 0000000..d105d9c
--- /dev/null
+++ b/agent-memory-client/agent_memory_client/filters.py
@@ -0,0 +1,91 @@
+"""
+Filter classes for search operations.
+
+These filters allow for filtering memory search results.
+"""
+
+from datetime import datetime
+
+from pydantic import BaseModel
+
+
+class BaseFilter(BaseModel):
+    """Base class for all filters"""
+
+    pass
+
+
+class SessionId(BaseFilter):
+    """Filter by session ID"""
+
+    eq: str | None = None
+    in_: list[str] | None = None
+    not_eq: str | None = None
+    not_in: list[str] | None = None
+
+
+class Namespace(BaseFilter):
+    """Filter by namespace"""
+
+    eq: str | None = None
+    in_: list[str] | None = None
+    not_eq: str | None = None
+    not_in: list[str] | None = None
+
+
+class UserId(BaseFilter):
+    """Filter by user ID"""
+
+    eq: str | None = None
+    in_: list[str] | None = None
+    not_eq: str | None = None
+    not_in: list[str] | None = None
+
+
+class Topics(BaseFilter):
+    """Filter by topics"""
+
+    any: list[str] | None = None
+    all: list[str] | None = None
+    none: list[str] | None = None
+
+
+class Entities(BaseFilter):
+    """Filter by entities"""
+
+    any: list[str] | None = None
+    all: list[str] | None = None
+    none: list[str] | None = None
+
+
+class CreatedAt(BaseFilter):
+    """Filter by creation date"""
+
+    gte: datetime | None = None
+    lte: datetime | None = None
+    eq: datetime | None = None
+
+
+class LastAccessed(BaseFilter):
+    """Filter by last accessed date"""
+
+    gte: datetime | None = None
+    lte: datetime | None = None
+    eq: datetime | None = None
+
+
+class EventDate(BaseFilter):
+    """Filter by event date"""
+
+    gte: datetime | None = None
+    lte: datetime | None = None
+    eq: datetime | None = None
+
+
+class MemoryType(BaseFilter):
+    """Filter by memory type"""
+
+    eq: str | None = None
+    in_: list[str] | None = None
+    not_eq: str | None = None
+    not_in: list[str] | None = None
diff --git a/agent-memory-client/agent_memory_client/models.py b/agent-memory-client/agent_memory_client/models.py
new file mode 100644
index 0000000..0b5b4a1
--- /dev/null
+++ b/agent-memory-client/agent_memory_client/models.py
@@ -0,0 +1,226 @@
+"""
+Data models for the Agent Memory Client.
+
+This module contains essential data models needed by the client.
+For full model definitions, see the main agent_memory_server package.
+"""
+
+from datetime import UTC, datetime
+from enum import Enum
+from typing import Any, Literal
+
+import ulid
+from pydantic import BaseModel, Field
+
+# Model name literals for model-specific window sizes
+ModelNameLiteral = Literal[
+    "gpt-3.5-turbo",
+    "gpt-3.5-turbo-16k",
+    "gpt-4",
+    "gpt-4-32k",
+    "gpt-4o",
+    "gpt-4o-mini",
+    "o1",
+    "o1-mini",
+    "o3-mini",
+    "text-embedding-ada-002",
+    "text-embedding-3-small",
+    "text-embedding-3-large",
+    "claude-3-opus-20240229",
+    "claude-3-sonnet-20240229",
+    "claude-3-haiku-20240307",
+    "claude-3-5-sonnet-20240620",
+    "claude-3-7-sonnet-20250219",
+    "claude-3-5-sonnet-20241022",
+    "claude-3-5-haiku-20241022",
+    "claude-3-7-sonnet-latest",
+    "claude-3-5-sonnet-latest",
+    "claude-3-5-haiku-latest",
+    "claude-3-opus-latest",
+]
+
+
+class MemoryTypeEnum(str, Enum):
+    """Enum for memory types with string values"""
+
+    EPISODIC = "episodic"
+    SEMANTIC = "semantic"
+    MESSAGE = "message"
+
+
+class MemoryMessage(BaseModel):
+    """A message in the memory system"""
+
+    role: str
+    content: str
+
+
+class MemoryRecord(BaseModel):
+    """A memory record"""
+
+    id: str = Field(description="Client-provided ID for deduplication and overwrites")
+    text: str
+    session_id: str | None = Field(
+        default=None,
+        description="Optional session ID for the memory record",
+    )
+    user_id: str | None = Field(
+        default=None,
+        description="Optional user ID for the memory record",
+    )
+    namespace: str | None = Field(
+        default=None,
+        description="Optional namespace for the memory record",
+    )
+    last_accessed: datetime = Field(
+        default_factory=lambda: datetime.now(UTC),
+        description="Datetime when the memory was last accessed",
+    )
+    created_at: datetime = Field(
+        default_factory=lambda: datetime.now(UTC),
+        description="Datetime when the memory was created",
+    )
+    updated_at: datetime = Field(
+        description="Datetime when the memory was last updated",
+        default_factory=lambda: datetime.now(UTC),
+    )
+    topics: list[str] | None = Field(
+        default=None,
+        description="Optional topics for the memory record",
+    )
+    entities: list[str] | None = Field(
+        default=None,
+        description="Optional entities for the memory record",
+    )
+    memory_hash: str | None = Field(
+        default=None,
+        description="Hash representation of the memory for deduplication",
+    )
+    discrete_memory_extracted: Literal["t", "f"] = Field(
+        default="f",
+        description="Whether memory extraction has run for this memory (only messages)",
+    )
+    memory_type: MemoryTypeEnum = Field(
+        default=MemoryTypeEnum.MESSAGE,
+        description="Type of memory",
+    )
+    persisted_at: datetime | None = Field(
+        default=None,
+        description="Server-assigned timestamp when memory was persisted to long-term storage",
+    )
+    extracted_from: list[str] | None = Field(
+        default=None,
+        description="List of message IDs that this memory was extracted from",
+    )
+    event_date: datetime | None = Field(
+        default=None,
+        description="Date/time when the event described in this memory occurred (primarily for episodic memories)",
+    )
+
+
+class ClientMemoryRecord(MemoryRecord):
+    """A memory record with a client-provided ID"""
+
+    id: str = Field(
+        default_factory=lambda: str(ulid.new()),
+        description="Client-provided ID generated by the client (ULID)",
+    )
+
+
+JSONTypes = str | float | int | bool | list | dict
+
+
+class WorkingMemory(BaseModel):
+    """Working memory for a session - contains both messages and structured memory records"""
+
+    # Support both message-based memory (conversation) and structured memory records
+    messages: list[MemoryMessage] = Field(
+        default_factory=list,
+        description="Conversation messages (role/content pairs)",
+    )
+    memories: list[MemoryRecord | ClientMemoryRecord] = Field(
+        default_factory=list,
+        description="Structured memory records for promotion to long-term storage",
+    )
+
+    # Arbitrary JSON data storage (separate from memories)
+    data: dict[str, JSONTypes] | None = Field(
+        default=None,
+        description="Arbitrary JSON data storage (key-value pairs)",
+    )
+
+    # Session context and metadata
+    context: str | None = Field(
+        default=None,
+        description="Optional summary of past session messages",
+    )
+    user_id: str | None = Field(
+        default=None,
+        description="Optional user ID for the working memory",
+    )
+    tokens: int = Field(
+        default=0,
+        description="Optional number of tokens in the working memory",
+    )
+
+    # Required session scoping
+    session_id: str
+    namespace: str | None = Field(
+        default=None,
+        description="Optional namespace for the working memory",
+    )
+
+    # TTL and timestamps
+    ttl_seconds: int = Field(
+        default=3600,  # 1 hour default
+        description="TTL for the working memory in seconds",
+    )
+    last_accessed: datetime = Field(
+        default_factory=lambda: datetime.now(UTC),
+        description="Datetime when the working memory was last accessed",
+    )
+
+
+class AckResponse(BaseModel):
+    """Generic acknowledgement response"""
+
+    status: str
+
+
+class HealthCheckResponse(BaseModel):
+    """Health check response"""
+
+    now: float
+
+
+class SessionListResponse(BaseModel):
+    """Response containing a list of sessions"""
+
+    sessions: list[str]
+    total: int
+
+
+class WorkingMemoryResponse(WorkingMemory):
+    """Response from working memory operations"""
+
+    pass
+
+
+class MemoryRecordResult(MemoryRecord):
+    """Result from a memory search"""
+
+    dist: float
+
+
+class MemoryRecordResults(BaseModel):
+    """Results from memory search operations"""
+
+    memories: list[MemoryRecordResult]
+    total: int
+    next_offset: int | None = None
+
+
+class MemoryPromptResponse(BaseModel):
+    """Response from memory prompt endpoint"""
+
+    messages: list[dict[str, Any]]  # Simplified to avoid MCP dependencies
diff --git a/agent-memory-client/pyproject.toml b/agent-memory-client/pyproject.toml
new file mode 100644
index 0000000..7970592
--- /dev/null
+++ b/agent-memory-client/pyproject.toml
@@ -0,0 +1,99 @@
+[build-system]
+requires = ["hatchling"]
+build-backend = "hatchling.build"
+
+[project]
+name = "agent-memory-client"
+dynamic = ["version"]
+description = "Python client for the Agent Memory Server REST API"
+readme = {file = "README.md", content-type = "text/markdown"}
+license = "MIT"
+requires-python = ">=3.10"
+authors = [
+    { name = "Agent Memory Server Team", email = "support@agentmemory.com" },
+]
+classifiers = [
+    "Development Status :: 4 - Beta",
+    "Intended Audience :: Developers",
+    "License :: OSI Approved :: MIT License",
+    "Programming Language :: Python :: 3",
+    "Programming Language :: Python :: 3.10",
+    "Programming Language :: Python :: 3.11",
+    "Programming Language :: Python :: 3.12",
+    "Topic :: Software Development :: Libraries :: Python Modules",
+    "Typing :: Typed",
+    "Framework :: AsyncIO",
+    "Topic :: Scientific/Engineering :: Artificial Intelligence",
+]
+dependencies = [
+    "httpx>=0.25.0",
+    "pydantic>=2.0.0",
+    "ulid-py>=1.1.0",
+]
+
+[project.optional-dependencies]
+dev = [
+    "pytest>=7.0.0",
+    "pytest-asyncio>=0.21.0",
+    "pytest-httpx>=0.21.0",
+    "ruff>=0.1.0",
+    "mypy>=1.5.0",
+]
+
+[project.urls]
+Homepage = "https://github.com/redis-developer/agent-memory-server"
+Repository = "https://github.com/redis-developer/agent-memory-server"
+Documentation = "https://github.com/redis-developer/agent-memory-server/tree/main/docs"
+Issues = "https://github.com/redis-developer/agent-memory-server/issues"
+
+[tool.hatch.version]
+path = "agent_memory_client/__init__.py"
+
+[tool.hatch.build.targets.wheel]
+packages = ["agent_memory_client"]
+
+[tool.ruff]
+line-length = 88
+select = [
+    "E",  # pycodestyle errors
+    "W",  # pycodestyle warnings
+    "F",  # pyflakes
+    "I",  # isort
+    "B",  # flake8-bugbear
+    "C4", # flake8-comprehensions
+    "UP", # pyupgrade
+    "SIM", # flake8-simplify
+]
+ignore = [
+    "E501",  # line too long
+]
+
+[tool.ruff.per-file-ignores]
+"__init__.py" = ["F401"]
+
+[tool.pytest.ini_options]
+asyncio_mode = "auto"
+testpaths = ["tests"]
+python_files = ["test_*.py"]
+python_classes = ["Test*"]
+python_functions = ["test_*"]
+
+[tool.mypy]
+python_version = "3.10"
+strict = true
+warn_return_any = true
+warn_unused_configs = true
+disallow_untyped_defs = true
+disallow_incomplete_defs = true
+check_untyped_defs = true
+disallow_untyped_decorators = true
+no_implicit_optional = true
+warn_redundant_casts = true
+warn_unused_ignores = true
+warn_no_return = true
+warn_unreachable = true
+strict_equality = true
+
+[[tool.mypy.overrides]]
+module = "tests.*"
+disallow_untyped_defs = false
diff --git a/agent-memory-client/tests/test_basic.py b/agent-memory-client/tests/test_basic.py
new file mode 100644
index 0000000..b69cbd8
--- /dev/null
+++ b/agent-memory-client/tests/test_basic.py
@@ -0,0 +1,98 @@
+"""
+Basic tests for the Agent Memory Client package.
+"""
+
+import pytest
+
+from agent_memory_client import (
+    MemoryAPIClient,
+    MemoryClientConfig,
+    MemoryClientError,
+    MemoryValidationError,
+    create_memory_client,
+)
+from agent_memory_client.models import ClientMemoryRecord, MemoryTypeEnum
+
+
+def test_imports():
+    """Test that all essential imports work."""
+    assert MemoryAPIClient is not None
+    assert MemoryClientConfig is not None
+    assert create_memory_client is not None
+    assert MemoryClientError is not None
+    assert MemoryValidationError is not None
+
+
+def test_client_config():
+    """Test client configuration."""
+    config = MemoryClientConfig(
+        base_url="http://localhost:8000", timeout=30.0, default_namespace="test"
+    )
+
+    assert config.base_url == "http://localhost:8000"
+    assert config.timeout == 30.0
+    assert config.default_namespace == "test"
+
+
+def test_client_creation():
+    """Test client creation."""
+    config = MemoryClientConfig(base_url="http://localhost:8000")
+    client = MemoryAPIClient(config)
+
+    assert client.config == config
+    assert client._client is not None
+
+
+def test_memory_record_creation():
+    """Test creating memory records."""
+    memory = ClientMemoryRecord(
+        text="Test memory",
+        memory_type=MemoryTypeEnum.SEMANTIC,
+        topics=["test"],
+        user_id="test-user",
+    )
+
+    assert memory.text == "Test memory"
+    assert memory.memory_type == MemoryTypeEnum.SEMANTIC
+    assert memory.topics == ["test"]
+    assert memory.user_id == "test-user"
+    assert memory.id is not None  # Should auto-generate
+
+
+def test_validation_methods():
+    """Test validation methods exist."""
+    config = MemoryClientConfig(base_url="http://localhost:8000")
+    client = MemoryAPIClient(config)
+
+    # Test that validation methods exist
+    assert hasattr(client, "validate_memory_record")
+    assert hasattr(client, "validate_search_filters")
+    assert hasattr(client, "_is_valid_ulid")
+
+
+def test_enhanced_methods():
+    """Test that enhanced methods exist."""
+    config = MemoryClientConfig(base_url="http://localhost:8000")
+    client = MemoryAPIClient(config)
+
+    # Test lifecycle management
+    assert hasattr(client, "promote_working_memories_to_long_term")
+
+    # Test batch operations
+    assert hasattr(client, "bulk_create_long_term_memories")
+
+    # Test pagination
+    assert hasattr(client, "search_all_long_term_memories")
+    assert hasattr(client, "search_all_memories")
+
+    # Test enhanced convenience methods
+    assert hasattr(client, "update_working_memory_data")
+    assert hasattr(client, "append_messages_to_working_memory")
+
+
+@pytest.mark.asyncio
+async def test_create_memory_client_function():
+    """Test the create_memory_client helper function."""
+    # This will fail to connect, but we can test that it creates the client
+    with pytest.raises(MemoryClientError):
+        await create_memory_client("http://nonexistent:8000")
diff --git a/agent-memory-client/tests/test_client.py b/agent-memory-client/tests/test_client.py
new file mode 100644
index 0000000..01bf7c9
--- /dev/null
+++ b/agent-memory-client/tests/test_client.py
@@ -0,0 +1,684 @@
+"""
+Test file for the enhanced Memory API Client functionality.
+
+Tests for new features like lifecycle management, batch operations,
+pagination utilities, validation, and enhanced convenience methods.
+"""
+
+import asyncio
+from collections.abc import AsyncGenerator
+from unittest.mock import AsyncMock, patch
+
+import httpx
+import pytest
+
+from agent_memory_client import MemoryAPIClient, MemoryClientConfig
+from agent_memory_client.models import (
+    AckResponse,
+    ClientMemoryRecord,
+    MemoryMessage,
+    MemoryRecordResult,
+    MemoryRecordResults,
+    MemoryTypeEnum,
+    WorkingMemoryResponse,
+)
+
+
+@pytest.fixture
+async def enhanced_test_client() -> AsyncGenerator[MemoryAPIClient, None]:
+    """Create a memory client for testing with mocked HTTP client."""
+    config = MemoryClientConfig(
+        base_url="http://test", default_namespace="test-namespace"
+    )
+    client = MemoryAPIClient(config)
+
+    # Mock the HTTP client to avoid actual network calls
+    client._client = AsyncMock(spec=httpx.AsyncClient)
+
+    yield client
+
+    await client.close()
+
+
+class TestMemoryLifecycleManagement:
+    """Tests for memory lifecycle management methods."""
+
+    @pytest.mark.asyncio
+    async def test_promote_working_memories_to_long_term(self, enhanced_test_client):
+        """Test promoting specific working memories to long-term storage."""
+        session_id = "test-session"
+
+        # Create test memories
+        memories = [
+            ClientMemoryRecord(
+                id="memory-1",
+                text="User prefers dark mode",
+                memory_type=MemoryTypeEnum.SEMANTIC,
+            ),
+            ClientMemoryRecord(
+                id="memory-2",
+                text="User completed project setup",
+                memory_type=MemoryTypeEnum.EPISODIC,
+            ),
+        ]
+
+        # Mock working memory response
+        working_memory_response = WorkingMemoryResponse(
+            session_id=session_id,
+            messages=[],
+            memories=memories,
+            data={},
+            context=None,
+            user_id=None,
+        )
+
+        with (
+            patch.object(enhanced_test_client, "get_session_memory") as mock_get,
+            patch.object(
+                enhanced_test_client, "create_long_term_memory"
+            ) as mock_create,
+        ):
+            mock_get.return_value = working_memory_response
+            mock_create.return_value = AckResponse(status="ok")
+
+            # Test promoting all memories
+            result = await enhanced_test_client.promote_working_memories_to_long_term(
+                session_id=session_id
+            )
+
+            assert result.status == "ok"
+            mock_get.assert_called_once_with(session_id=session_id, namespace=None)
+            mock_create.assert_called_once_with(memories)
+
+    @pytest.mark.asyncio
+    async def test_promote_specific_memory_ids(self, enhanced_test_client):
+        """Test promoting only specific memory IDs."""
+        session_id = "test-session"
+
+        memories = [
+            ClientMemoryRecord(
+                id="memory-1",
+                text="User prefers dark mode",
+                memory_type=MemoryTypeEnum.SEMANTIC,
+            ),
+            ClientMemoryRecord(
+                id="memory-2",
+                text="User completed project setup",
+                memory_type=MemoryTypeEnum.EPISODIC,
+            ),
+        ]
+
+        working_memory_response = WorkingMemoryResponse(
+            session_id=session_id,
+            messages=[],
+            memories=memories,
+            data={},
+            context=None,
+            user_id=None,
+        )
+
+        with (
+            patch.object(enhanced_test_client, "get_session_memory") as mock_get,
+            patch.object(
+                enhanced_test_client, "create_long_term_memory"
+            ) as mock_create,
+        ):
+            mock_get.return_value = working_memory_response
+            mock_create.return_value = AckResponse(status="ok")
+
+            # Test promoting only specific memory
+            result = await enhanced_test_client.promote_working_memories_to_long_term(
+                session_id=session_id, memory_ids=["memory-1"]
+            )
+
+            assert result.status == "ok"
+            # Should only promote memory-1
+            mock_create.assert_called_once()
+            promoted_memories = mock_create.call_args[0][0]
+            assert len(promoted_memories) == 1
+            assert promoted_memories[0].id == "memory-1"
+
+    @pytest.mark.asyncio
+    async def test_promote_no_memories(self, enhanced_test_client):
+        """Test promoting when no memories exist."""
+        session_id = "test-session"
+
+        working_memory_response = WorkingMemoryResponse(
+            session_id=session_id,
+            messages=[],
+            memories=[],
+            data={},
+            context=None,
+            user_id=None,
+        )
+
+        with patch.object(enhanced_test_client, "get_session_memory") as mock_get:
+            mock_get.return_value = working_memory_response
+
+            result = await enhanced_test_client.promote_working_memories_to_long_term(
+                session_id=session_id
+            )
+
+            assert result.status == "ok"
+
+
+class TestBatchOperations:
+    """Tests for batch operations."""
+
+    @pytest.mark.asyncio
+    async def test_bulk_create_long_term_memories(self, enhanced_test_client):
+        """Test bulk creation of long-term memories with batching."""
+        # Create test memory batches
+        batch1 = [
+            ClientMemoryRecord(
+                text=f"Memory {i}",
+                memory_type=MemoryTypeEnum.SEMANTIC,
+            )
+            for i in range(50)
+        ]
+        batch2 = [
+            ClientMemoryRecord(
+                text=f"Memory {i}",
+                memory_type=MemoryTypeEnum.EPISODIC,
+            )
+            for i in range(30)
+        ]
+
+        memory_batches = [batch1, batch2]
+
+        with patch.object(
+            enhanced_test_client, "create_long_term_memory"
+        ) as mock_create:
+            mock_create.return_value = AckResponse(status="ok")
+
+            # Test with default batch size
+            results = await enhanced_test_client.bulk_create_long_term_memories(
+                memory_batches=memory_batches,
+                batch_size=25,
+                delay_between_batches=0,  # No delay for test speed
+            )
+
+            # Should have created 4 batches: 25+25 for batch1, 25+5 for batch2
+            assert len(results) == 4
+            assert all(result.status == "ok" for result in results)
+            assert mock_create.call_count == 4
+
+    @pytest.mark.asyncio
+    async def test_bulk_create_with_delay(self, enhanced_test_client):
+        """Test bulk creation with rate limiting delay."""
+        batch = [
+            ClientMemoryRecord(
+                text="Test memory",
+                memory_type=MemoryTypeEnum.SEMANTIC,
+            )
+        ]
+
+        with (
+            patch.object(
+                enhanced_test_client, "create_long_term_memory"
+            ) as mock_create,
+            patch("asyncio.sleep") as mock_sleep,
+        ):
+            mock_create.return_value = AckResponse(status="ok")
+
+            asyncio.get_event_loop().time()
+            await enhanced_test_client.bulk_create_long_term_memories(
+                memory_batches=[batch],
+                delay_between_batches=0.1,
+            )
+
+            # Should have called sleep (though mocked)
+            mock_sleep.assert_called_with(0.1)
+
+
+class TestPaginationUtilities:
+    """Tests for pagination utilities."""
+
+    @pytest.mark.asyncio
+    async def test_search_all_long_term_memories(self, enhanced_test_client):
+        """Test auto-paginating search for long-term memories."""
+        # Mock responses for pagination
+        first_response = MemoryRecordResults(
+            total=150,
+            memories=[
+                MemoryRecordResult(
+                    id=f"memory-{i}",
+                    text=f"Memory text {i}",
+                    dist=0.1,
+                )
+                for i in range(50)
+            ],
+            next_offset=50,
+        )
+
+        second_response = MemoryRecordResults(
+            total=150,
+            memories=[
+                MemoryRecordResult(
+                    id=f"memory-{i}",
+                    text=f"Memory text {i}",
+                    dist=0.1,
+                )
+                for i in range(50, 100)
+            ],
+            next_offset=100,
+        )
+
+        third_response = MemoryRecordResults(
+            total=150,
+            memories=[
+                MemoryRecordResult(
+                    id=f"memory-{i}",
+                    text=f"Memory text {i}",
+                    dist=0.1,
+                )
+                for i in range(100, 130)  # Less than batch_size, indicating end
+            ],
+            next_offset=None,
+        )
+
+        with patch.object(
+            enhanced_test_client, "search_long_term_memory"
+        ) as mock_search:
+            mock_search.side_effect = [first_response, second_response, third_response]
+
+            # Collect all results
+            all_memories = []
+            async for memory in enhanced_test_client.search_all_long_term_memories(
+                text="test query", batch_size=50
+            ):
+                all_memories.append(memory)
+
+            # Should have retrieved all 130 memories
+            assert len(all_memories) == 130
+            assert all_memories[0].id == "memory-0"
+            assert all_memories[-1].id == "memory-129"
+
+            # Should have made 3 API calls
+            assert mock_search.call_count == 3
+
+    @pytest.mark.asyncio
+    async def test_search_all_memories(self, enhanced_test_client):
+        """Test auto-paginating unified memory search."""
+        # Similar test for unified search
+        response = MemoryRecordResults(
+            total=25,
+            memories=[
+                MemoryRecordResult(
+                    id=f"memory-{i}",
+                    text=f"Memory text {i}",
+                    dist=0.1,
+                )
+                for i in range(25)
+            ],
+            next_offset=None,
+        )
+
+        with patch.object(enhanced_test_client, "search_memories") as mock_search:
+            mock_search.return_value = response
+
+            all_memories = []
+            async for memory in enhanced_test_client.search_all_memories(
+                text="test query", batch_size=50
+            ):
+                all_memories.append(memory)
+
+            assert len(all_memories) == 25
+            assert mock_search.call_count == 1
+
+
+class TestClientSideValidation:
+    """Tests for client-side validation methods."""
+
+    def test_validate_memory_record_success(self, enhanced_test_client):
+        """Test successful memory record validation."""
+        memory = ClientMemoryRecord(
+            text="Valid memory text",
+            memory_type=MemoryTypeEnum.SEMANTIC,
+            id="01HN0000000000000000000000",  # Valid ULID
+        )
+
+        # Should not raise
+        enhanced_test_client.validate_memory_record(memory)
+
+    def test_validate_memory_record_empty_text(self, enhanced_test_client):
+        """Test validation failure for empty text."""
+        memory = ClientMemoryRecord(
+            text="",
+            memory_type=MemoryTypeEnum.SEMANTIC,
+        )
+
+        with pytest.raises(ValueError, match="Memory text cannot be empty"):
+            enhanced_test_client.validate_memory_record(memory)
+
+    def test_validate_memory_record_invalid_type(self, enhanced_test_client):
+        """Test validation failure for invalid memory type."""
+        # Test with a valid memory but manually set invalid type
+        memory = ClientMemoryRecord(
+            text="Valid text",
+            memory_type=MemoryTypeEnum.SEMANTIC,
+        )
+        # Manually override the memory type to test validation
+        memory.memory_type = "invalid_type"  # type: ignore
+
+        with pytest.raises(ValueError, match="Invalid memory type"):
+            enhanced_test_client.validate_memory_record(memory)
+
+    def test_validate_memory_record_invalid_id(self, enhanced_test_client):
+        """Test validation failure for invalid ID format."""
+        memory = ClientMemoryRecord(
+            text="Valid text",
+            memory_type=MemoryTypeEnum.SEMANTIC,
+            id="invalid-id-format",
+        )
+
+        with pytest.raises(ValueError, match="Invalid ID format"):
+            enhanced_test_client.validate_memory_record(memory)
+
+    def test_validate_search_filters_success(self, enhanced_test_client):
+        """Test successful search filter validation."""
+        filters = {
+            "limit": 10,
+            "offset": 0,
+            "distance_threshold": 0.5,
+            "session_id": "test-session",
+        }
+
+        # Should not raise
+        enhanced_test_client.validate_search_filters(**filters)
+
+    def test_validate_search_filters_invalid_key(self, enhanced_test_client):
+        """Test validation failure for invalid filter key."""
+        filters = {"invalid_key": "value"}
+
+        with pytest.raises(ValueError, match="Invalid filter key"):
+            enhanced_test_client.validate_search_filters(**filters)
+
+    def test_validate_search_filters_invalid_limit(self, enhanced_test_client):
+        """Test validation failure for invalid limit."""
+        filters = {"limit": -1}
+
+        with pytest.raises(ValueError, match="Limit must be a positive integer"):
+            enhanced_test_client.validate_search_filters(**filters)
+
+    def test_validate_search_filters_invalid_offset(self, enhanced_test_client):
+        """Test validation failure for invalid offset."""
+        filters = {"offset": -1}
+
+        with pytest.raises(ValueError, match="Offset must be a non-negative integer"):
+            enhanced_test_client.validate_search_filters(**filters)
+
+    def test_validate_search_filters_invalid_distance(self, enhanced_test_client):
+        """Test validation failure for invalid distance threshold."""
+        filters = {"distance_threshold": -0.5}
+
+        with pytest.raises(
+            ValueError, match="Distance threshold must be a non-negative number"
+        ):
+            enhanced_test_client.validate_search_filters(**filters)
+
+
+class TestEnhancedConvenienceMethods:
+    """Tests for enhanced convenience methods."""
+
+    @pytest.mark.asyncio
+    async def test_update_working_memory_data_merge(self, enhanced_test_client):
+        """Test updating working memory data with merge strategy."""
+        session_id = "test-session"
+
+        existing_memory = WorkingMemoryResponse(
+            session_id=session_id,
+            messages=[],
+            memories=[],
+            data={"existing_key": "existing_value", "shared_key": "old_value"},
+            context=None,
+            user_id=None,
+        )
+
+        with (
+            patch.object(enhanced_test_client, "get_session_memory") as mock_get,
+            patch.object(enhanced_test_client, "put_session_memory") as mock_put,
+        ):
+            mock_get.return_value = existing_memory
+            mock_put.return_value = existing_memory
+
+            updates = {"new_key": "new_value", "shared_key": "new_value"}
+
+            await enhanced_test_client.update_working_memory_data(
+                session_id=session_id,
+                data_updates=updates,
+                merge_strategy="merge",
+            )
+
+            # Check that put was called with merged data
+            mock_put.assert_called_once()
+            working_memory_arg = mock_put.call_args[0][1]
+            expected_data = {
+                "existing_key": "existing_value",
+                "shared_key": "new_value",
+                "new_key": "new_value",
+            }
+            assert working_memory_arg.data == expected_data
+
+    @pytest.mark.asyncio
+    async def test_update_working_memory_data_replace(self, enhanced_test_client):
+        """Test updating working memory data with replace strategy."""
+        session_id = "test-session"
+
+        existing_memory = WorkingMemoryResponse(
+            session_id=session_id,
+            messages=[],
+            memories=[],
+            data={"existing_key": "existing_value"},
+            context=None,
+            user_id=None,
+        )
+
+        with (
+            patch.object(enhanced_test_client, "get_session_memory") as mock_get,
+            patch.object(enhanced_test_client, "put_session_memory") as mock_put,
+        ):
+            mock_get.return_value = existing_memory
+            mock_put.return_value = existing_memory
+
+            updates = {"new_key": "new_value"}
+
+            await enhanced_test_client.update_working_memory_data(
+                session_id=session_id,
+                data_updates=updates,
+                merge_strategy="replace",
+            )
+
+            # Check that put was called with replaced data
+            working_memory_arg = mock_put.call_args[0][1]
+            assert working_memory_arg.data == updates
+
+    @pytest.mark.asyncio
+    async def test_update_working_memory_data_deep_merge(self, enhanced_test_client):
+        """Test updating working memory data with deep merge strategy."""
+        session_id = "test-session"
+
+        existing_memory = WorkingMemoryResponse(
+            session_id=session_id,
+            messages=[],
+            memories=[],
+            data={
+                "nested": {"existing": "value", "shared": "old"},
+                "top_level": "existing",
+            },
+            context=None,
+            user_id=None,
+        )
+
+        with (
+            patch.object(enhanced_test_client, "get_session_memory") as mock_get,
+            patch.object(enhanced_test_client, "put_session_memory") as mock_put,
+        ):
+            mock_get.return_value = existing_memory
+            mock_put.return_value = existing_memory
+
+            updates = {
+                "nested": {"new": "value", "shared": "new"},
+                "new_top": "new",
+            }
+
+            await enhanced_test_client.update_working_memory_data(
+                session_id=session_id,
+                data_updates=updates,
+                merge_strategy="deep_merge",
+            )
+
+            # Check deep merge result
+            working_memory_arg = mock_put.call_args[0][1]
+            expected_data = {
+                "nested": {"existing": "value", "shared": "new", "new": "value"},
+                "top_level": "existing",
+                "new_top": "new",
+            }
+            assert working_memory_arg.data == expected_data
+
+    @pytest.mark.asyncio
+    async def test_append_messages_to_working_memory(self, enhanced_test_client):
+        """Test appending messages to existing working memory."""
+        session_id = "test-session"
+
+        existing_messages = [
+            MemoryMessage(role="user", content="First message"),
+        ]
+
+        existing_memory = WorkingMemoryResponse(
+            session_id=session_id,
+            messages=existing_messages,
+            memories=[],
+            data={},
+            context=None,
+            user_id=None,
+        )
+
+        new_messages = [
+            MemoryMessage(role="assistant", content="Second message"),
+            MemoryMessage(role="user", content="Third message"),
+        ]
+
+        with (
+            patch.object(enhanced_test_client, "get_session_memory") as mock_get,
+            patch.object(enhanced_test_client, "put_session_memory") as mock_put,
+        ):
+            mock_get.return_value = existing_memory
+            mock_put.return_value = existing_memory
+
+            await enhanced_test_client.append_messages_to_working_memory(
+                session_id=session_id,
+                messages=new_messages,
+            )
+
+            # Check that messages were appended
+            working_memory_arg = mock_put.call_args[0][1]
+            assert len(working_memory_arg.messages) == 3
+            assert working_memory_arg.messages[0].content == "First message"
+            assert working_memory_arg.messages[1].content == "Second message"
+            assert working_memory_arg.messages[2].content == "Third message"
+
+    def test_deep_merge_dicts(self, enhanced_test_client):
+        """Test the deep merge dictionary utility method."""
+        base = {
+            "a": {"nested": {"deep": "value1", "shared": "old"}},
+            "b": "simple",
+        }
+
+        updates = {
+            "a": {"nested": {"shared": "new", "additional": "value2"}},
+            "c": "new_simple",
+        }
+
+        result = enhanced_test_client._deep_merge_dicts(base, updates)
+
+        expected = {
+            "a": {
+                "nested": {
+                    "deep": "value1",
+                    "shared": "new",
+                    "additional": "value2",
+                }
+            },
+            "b": "simple",
+            "c": "new_simple",
+        }
+
+        assert result == expected
+
+    def test_is_valid_ulid(self, enhanced_test_client):
+        """Test ULID validation utility method."""
+        # Valid ULID
+        assert enhanced_test_client._is_valid_ulid("01HN0000000000000000000000")
+
+        # Invalid ULID
+        assert not enhanced_test_client._is_valid_ulid("invalid-id")
+        assert not enhanced_test_client._is_valid_ulid("")
+        assert not enhanced_test_client._is_valid_ulid("too-short")
+
+
+class TestErrorHandling:
+    """Tests for error handling in new methods."""
+
+    @pytest.mark.asyncio
+    async def test_bulk_create_handles_failures(self, enhanced_test_client):
+        """Test that bulk create handles individual batch failures."""
+        batch = [
+            ClientMemoryRecord(
+                text="Test memory",
+                memory_type=MemoryTypeEnum.SEMANTIC,
+            )
+        ]
+
+        with patch.object(
+            enhanced_test_client, "create_long_term_memory"
+        ) as mock_create:
+            # First call succeeds, second fails, third succeeds
+            mock_create.side_effect = [
+                AckResponse(status="ok"),
+                Exception("API Error"),
+                AckResponse(status="ok"),
+            ]
+
+            # Should raise the exception from the second batch
+            with pytest.raises(Exception, match="API Error"):
+                await enhanced_test_client.bulk_create_long_term_memories(
+                    memory_batches=[batch, batch, batch],
+                    delay_between_batches=0,
+                )
+
+    @pytest.mark.asyncio
+    async def test_pagination_handles_empty_results(self, enhanced_test_client):
+        """Test pagination utilities handle empty result sets."""
+        empty_response = MemoryRecordResults(
+            total=0,
+            memories=[],
+            next_offset=None,
+        )
+
+        with patch.object(
+            enhanced_test_client, "search_long_term_memory"
+        ) as mock_search:
+            mock_search.return_value = empty_response
+
+            # Should handle empty results gracefully
+            all_memories = []
+            async for memory in enhanced_test_client.search_all_long_term_memories(
+                text="test query"
+            ):
+                all_memories.append(memory)
+
+            assert len(all_memories) == 0
+            assert mock_search.call_count == 1
+
+    def test_validation_with_none_values(self, enhanced_test_client):
+        """Test validation handles None values appropriately."""
+        memory = ClientMemoryRecord(
+            text="Valid text",
+            memory_type=MemoryTypeEnum.SEMANTIC,
+        )
+        # ClientMemoryRecord generates a ULID ID by default, so this should pass
+
+        # Should not raise
+        enhanced_test_client.validate_memory_record(memory)
diff --git a/agent-memory-client/uv.lock b/agent-memory-client/uv.lock
new file mode 100644
index 0000000..f205d4e
--- /dev/null
+++ b/agent-memory-client/uv.lock
@@ -0,0 +1,473 @@
+version = 1
+requires-python = ">=3.10"
+
+[[package]]
+name = "agent-memory-client"
+source = { editable = "." }
+dependencies = [
+    { name = "httpx" },
+    { name = "pydantic" },
+    { name = "ulid-py" },
+]
+
+[package.optional-dependencies]
+dev = [
+    { name = "mypy" },
+    { name = "pytest" },
+    { name = "pytest-asyncio" },
+    { name = "pytest-httpx" },
+    { name = "ruff" },
+]
+
+[package.metadata]
+requires-dist = [
+    { name = "httpx", specifier = ">=0.25.0" },
+    { name = "mypy", marker = "extra == 'dev'", specifier = ">=1.5.0" },
+    { name = "pydantic", specifier = ">=2.0.0" },
+    { name = "pytest", marker = "extra == 'dev'", specifier = ">=7.0.0" },
+    { name = "pytest-asyncio", marker = "extra == 'dev'", specifier = ">=0.21.0" },
+    { name = "pytest-httpx", marker = "extra == 'dev'", specifier = ">=0.21.0" },
+    { name = "ruff", marker = "extra == 'dev'", specifier = ">=0.1.0" },
+    { name = "ulid-py", specifier = ">=1.1.0" },
+]
+
+[[package]]
+name = "annotated-types"
+version = "0.7.0"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/ee/67/531ea369ba64dcff5ec9c3402f9f51bf748cec26dde048a2f973a4eea7f5/annotated_types-0.7.0.tar.gz", hash = "sha256:aff07c09a53a08bc8cfccb9c85b05f1aa9a2a6f23728d790723543408344ce89", size = 16081 }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/78/b6/6307fbef88d9b5ee7421e68d78a9f162e0da4900bc5f5793f6d3d0e34fb8/annotated_types-0.7.0-py3-none-any.whl", hash = "sha256:1f02e8b43a8fbbc3f3e0d4f0f4bfc8131bcb4eebe8849b8e5c773f3a1c582a53", size = 13643 },
+]
+
+[[package]]
+name = "anyio"
+version = "4.9.0"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "exceptiongroup", marker = "python_full_version < '3.11'" },
+    { name = "idna" },
+    { name = "sniffio" },
+    { name = "typing-extensions", marker = "python_full_version < '3.13'" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/95/7d/4c1bd541d4dffa1b52bd83fb8527089e097a106fc90b467a7313b105f840/anyio-4.9.0.tar.gz", hash = "sha256:673c0c244e15788651a4ff38710fea9675823028a6f08a5eda409e0c9840a028", size = 190949 }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/a1/ee/48ca1a7c89ffec8b6a0c5d02b89c305671d5ffd8d3c94acf8b8c408575bb/anyio-4.9.0-py3-none-any.whl", hash = "sha256:9f76d541cad6e36af7beb62e978876f3b41e3e04f2c1fbf0884604c0a9c4d93c", size = 100916 },
+]
+
+[[package]]
+name = "certifi"
+version = "2025.4.26"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/e8/9e/c05b3920a3b7d20d3d3310465f50348e5b3694f4f88c6daf736eef3024c4/certifi-2025.4.26.tar.gz", hash = "sha256:0a816057ea3cdefcef70270d2c515e4506bbc954f417fa5ade2021213bb8f0c6", size = 160705 }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/4a/7e/3db2bd1b1f9e95f7cddca6d6e75e2f2bd9f51b1246e546d88addca0106bd/certifi-2025.4.26-py3-none-any.whl", hash = "sha256:30350364dfe371162649852c63336a15c70c6510c2ad5015b21c2345311805f3", size = 159618 },
+]
+
+[[package]]
+name = "colorama"
+version = "0.4.6"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/d8/53/6f443c9a4a8358a93a6792e2acffb9d9d5cb0a5cfd8802644b7b1c9a02e4/colorama-0.4.6.tar.gz", hash = "sha256:08695f5cb7ed6e0531a20572697297273c47b8cae5a63ffc6d6ed5c201be6e44", size = 27697 }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/d1/d6/3965ed04c63042e047cb6a3e6ed1a63a35087b6a609aa3a15ed8ac56c221/colorama-0.4.6-py2.py3-none-any.whl", hash = "sha256:4f1d9991f5acc0ca119f9d443620b77f9d6b33703e51011c16baf57afb285fc6", size = 25335 },
+]
+
+[[package]]
+name = "exceptiongroup"
+version = "1.3.0"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "typing-extensions", marker = "python_full_version < '3.13'" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/0b/9f/a65090624ecf468cdca03533906e7c69ed7588582240cfe7cc9e770b50eb/exceptiongroup-1.3.0.tar.gz", hash = "sha256:b241f5885f560bc56a59ee63ca4c6a8bfa46ae4ad651af316d4e81817bb9fd88", size = 29749 }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/36/f4/c6e662dade71f56cd2f3735141b265c3c79293c109549c1e6933b0651ffc/exceptiongroup-1.3.0-py3-none-any.whl", hash = "sha256:4d111e6e0c13d0644cad6ddaa7ed0261a0b36971f6d23e7ec9b4b9097da78a10", size = 16674 },
+]
+
+[[package]]
+name = "h11"
+version = "0.16.0"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/01/ee/02a2c011bdab74c6fb3c75474d40b3052059d95df7e73351460c8588d963/h11-0.16.0.tar.gz", hash = "sha256:4e35b956cf45792e4caa5885e69fba00bdbc6ffafbfa020300e549b208ee5ff1", size = 101250 }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/04/4b/29cac41a4d98d144bf5f6d33995617b185d14b22401f75ca86f384e87ff1/h11-0.16.0-py3-none-any.whl", hash = "sha256:63cf8bbe7522de3bf65932fda1d9c2772064ffb3dae62d55932da54b31cb6c86", size = 37515 },
+]
+
+[[package]]
+name = "httpcore"
+version = "1.0.9"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "certifi" },
+    { name = "h11" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/06/94/82699a10bca87a5556c9c59b5963f2d039dbd239f25bc2a63907a05a14cb/httpcore-1.0.9.tar.gz", hash = "sha256:6e34463af53fd2ab5d807f399a9b45ea31c3dfa2276f15a2c3f00afff6e176e8", size = 85484 }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/7e/f5/f66802a942d491edb555dd61e3a9961140fd64c90bce1eafd741609d334d/httpcore-1.0.9-py3-none-any.whl", hash = "sha256:2d400746a40668fc9dec9810239072b40b4484b640a8c38fd654a024c7a1bf55", size = 78784 },
+]
+
+[[package]]
+name = "httpx"
+version = "0.28.1"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "anyio" },
+    { name = "certifi" },
+    { name = "httpcore" },
+    { name = "idna" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/b1/df/48c586a5fe32a0f01324ee087459e112ebb7224f646c0b5023f5e79e9956/httpx-0.28.1.tar.gz", hash = "sha256:75e98c5f16b0f35b567856f597f06ff2270a374470a5c2392242528e3e3e42fc", size = 141406 }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/2a/39/e50c7c3a983047577ee07d2a9e53faf5a69493943ec3f6a384bdc792deb2/httpx-0.28.1-py3-none-any.whl", hash = "sha256:d909fcccc110f8c7faf814ca82a9a4d816bc5a6dbfea25d6591d6985b8ba59ad", size = 73517 },
+]
+
+[[package]]
+name = "idna"
+version = "3.10"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/f1/70/7703c29685631f5a7590aa73f1f1d3fa9a380e654b86af429e0934a32f7d/idna-3.10.tar.gz", hash = "sha256:12f65c9b470abda6dc35cf8e63cc574b1c52b11df2c86030af0ac09b01b13ea9", size = 190490 }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/76/c6/c88e154df9c4e1a2a66ccf0005a88dfb2650c1dffb6f5ce603dfbd452ce3/idna-3.10-py3-none-any.whl", hash = "sha256:946d195a0d259cbba61165e88e65941f16e9b36ea6ddb97f00452bae8b1287d3", size = 70442 },
+]
+
+[[package]]
+name = "iniconfig"
+version = "2.1.0"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/f2/97/ebf4da567aa6827c909642694d71c9fcf53e5b504f2d96afea02718862f3/iniconfig-2.1.0.tar.gz", hash = "sha256:3abbd2e30b36733fee78f9c7f7308f2d0050e88f0087fd25c2645f63c773e1c7", size = 4793 }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/2c/e1/e6716421ea10d38022b952c159d5161ca1193197fb744506875fbb87ea7b/iniconfig-2.1.0-py3-none-any.whl", hash = "sha256:9deba5723312380e77435581c6bf4935c94cbfab9b1ed33ef8d238ea168eb760", size = 6050 },
+]
+
+[[package]]
+name = "mypy"
+version = "1.16.0"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "mypy-extensions" },
+    { name = "pathspec" },
+    { name = "tomli", marker = "python_full_version < '3.11'" },
+    { name = "typing-extensions" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/d4/38/13c2f1abae94d5ea0354e146b95a1be9b2137a0d506728e0da037c4276f6/mypy-1.16.0.tar.gz", hash = "sha256:84b94283f817e2aa6350a14b4a8fb2a35a53c286f97c9d30f53b63620e7af8ab", size = 3323139 }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/64/5e/a0485f0608a3d67029d3d73cec209278b025e3493a3acfda3ef3a88540fd/mypy-1.16.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:7909541fef256527e5ee9c0a7e2aeed78b6cda72ba44298d1334fe7881b05c5c", size = 10967416 },
+    { url = "https://files.pythonhosted.org/packages/4b/53/5837c221f74c0d53a4bfc3003296f8179c3a2a7f336d7de7bbafbe96b688/mypy-1.16.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:e71d6f0090c2256c713ed3d52711d01859c82608b5d68d4fa01a3fe30df95571", size = 10087654 },
+    { url = "https://files.pythonhosted.org/packages/29/59/5fd2400352c3093bed4c09017fe671d26bc5bb7e6ef2d4bf85f2a2488104/mypy-1.16.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:936ccfdd749af4766be824268bfe22d1db9eb2f34a3ea1d00ffbe5b5265f5491", size = 11875192 },
+    { url = "https://files.pythonhosted.org/packages/ad/3e/4bfec74663a64c2012f3e278dbc29ffe82b121bc551758590d1b6449ec0c/mypy-1.16.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:4086883a73166631307fdd330c4a9080ce24913d4f4c5ec596c601b3a4bdd777", size = 12612939 },
+    { url = "https://files.pythonhosted.org/packages/88/1f/fecbe3dcba4bf2ca34c26ca016383a9676711907f8db4da8354925cbb08f/mypy-1.16.0-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:feec38097f71797da0231997e0de3a58108c51845399669ebc532c815f93866b", size = 12874719 },
+    { url = "https://files.pythonhosted.org/packages/f3/51/c2d280601cd816c43dfa512a759270d5a5ef638d7ac9bea9134c8305a12f/mypy-1.16.0-cp310-cp310-win_amd64.whl", hash = "sha256:09a8da6a0ee9a9770b8ff61b39c0bb07971cda90e7297f4213741b48a0cc8d93", size = 9487053 },
+    { url = "https://files.pythonhosted.org/packages/24/c4/ff2f79db7075c274fe85b5fff8797d29c6b61b8854c39e3b7feb556aa377/mypy-1.16.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:9f826aaa7ff8443bac6a494cf743f591488ea940dd360e7dd330e30dd772a5ab", size = 10884498 },
+    { url = "https://files.pythonhosted.org/packages/02/07/12198e83006235f10f6a7808917376b5d6240a2fd5dce740fe5d2ebf3247/mypy-1.16.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:82d056e6faa508501af333a6af192c700b33e15865bda49611e3d7d8358ebea2", size = 10011755 },
+    { url = "https://files.pythonhosted.org/packages/f1/9b/5fd5801a72b5d6fb6ec0105ea1d0e01ab2d4971893076e558d4b6d6b5f80/mypy-1.16.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:089bedc02307c2548eb51f426e085546db1fa7dd87fbb7c9fa561575cf6eb1ff", size = 11800138 },
+    { url = "https://files.pythonhosted.org/packages/2e/81/a117441ea5dfc3746431e51d78a4aca569c677aa225bca2cc05a7c239b61/mypy-1.16.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:6a2322896003ba66bbd1318c10d3afdfe24e78ef12ea10e2acd985e9d684a666", size = 12533156 },
+    { url = "https://files.pythonhosted.org/packages/3f/38/88ec57c6c86014d3f06251e00f397b5a7daa6888884d0abf187e4f5f587f/mypy-1.16.0-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:021a68568082c5b36e977d54e8f1de978baf401a33884ffcea09bd8e88a98f4c", size = 12742426 },
+    { url = "https://files.pythonhosted.org/packages/bd/53/7e9d528433d56e6f6f77ccf24af6ce570986c2d98a5839e4c2009ef47283/mypy-1.16.0-cp311-cp311-win_amd64.whl", hash = "sha256:54066fed302d83bf5128632d05b4ec68412e1f03ef2c300434057d66866cea4b", size = 9478319 },
+    { url = "https://files.pythonhosted.org/packages/70/cf/158e5055e60ca2be23aec54a3010f89dcffd788732634b344fc9cb1e85a0/mypy-1.16.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:c5436d11e89a3ad16ce8afe752f0f373ae9620841c50883dc96f8b8805620b13", size = 11062927 },
+    { url = "https://files.pythonhosted.org/packages/94/34/cfff7a56be1609f5d10ef386342ce3494158e4d506516890142007e6472c/mypy-1.16.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:f2622af30bf01d8fc36466231bdd203d120d7a599a6d88fb22bdcb9dbff84090", size = 10083082 },
+    { url = "https://files.pythonhosted.org/packages/b3/7f/7242062ec6288c33d8ad89574df87c3903d394870e5e6ba1699317a65075/mypy-1.16.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:d045d33c284e10a038f5e29faca055b90eee87da3fc63b8889085744ebabb5a1", size = 11828306 },
+    { url = "https://files.pythonhosted.org/packages/6f/5f/b392f7b4f659f5b619ce5994c5c43caab3d80df2296ae54fa888b3d17f5a/mypy-1.16.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:b4968f14f44c62e2ec4a038c8797a87315be8df7740dc3ee8d3bfe1c6bf5dba8", size = 12702764 },
+    { url = "https://files.pythonhosted.org/packages/9b/c0/7646ef3a00fa39ac9bc0938626d9ff29d19d733011be929cfea59d82d136/mypy-1.16.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:eb14a4a871bb8efb1e4a50360d4e3c8d6c601e7a31028a2c79f9bb659b63d730", size = 12896233 },
+    { url = "https://files.pythonhosted.org/packages/6d/38/52f4b808b3fef7f0ef840ee8ff6ce5b5d77381e65425758d515cdd4f5bb5/mypy-1.16.0-cp312-cp312-win_amd64.whl", hash = "sha256:bd4e1ebe126152a7bbaa4daedd781c90c8f9643c79b9748caa270ad542f12bec", size = 9565547 },
+    { url = "https://files.pythonhosted.org/packages/97/9c/ca03bdbefbaa03b264b9318a98950a9c683e06472226b55472f96ebbc53d/mypy-1.16.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:a9e056237c89f1587a3be1a3a70a06a698d25e2479b9a2f57325ddaaffc3567b", size = 11059753 },
+    { url = "https://files.pythonhosted.org/packages/36/92/79a969b8302cfe316027c88f7dc6fee70129490a370b3f6eb11d777749d0/mypy-1.16.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:0b07e107affb9ee6ce1f342c07f51552d126c32cd62955f59a7db94a51ad12c0", size = 10073338 },
+    { url = "https://files.pythonhosted.org/packages/14/9b/a943f09319167da0552d5cd722104096a9c99270719b1afeea60d11610aa/mypy-1.16.0-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:c6fb60cbd85dc65d4d63d37cb5c86f4e3a301ec605f606ae3a9173e5cf34997b", size = 11827764 },
+    { url = "https://files.pythonhosted.org/packages/ec/64/ff75e71c65a0cb6ee737287c7913ea155845a556c64144c65b811afdb9c7/mypy-1.16.0-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:a7e32297a437cc915599e0578fa6bc68ae6a8dc059c9e009c628e1c47f91495d", size = 12701356 },
+    { url = "https://files.pythonhosted.org/packages/0a/ad/0e93c18987a1182c350f7a5fab70550852f9fabe30ecb63bfbe51b602074/mypy-1.16.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:afe420c9380ccec31e744e8baff0d406c846683681025db3531b32db56962d52", size = 12900745 },
+    { url = "https://files.pythonhosted.org/packages/28/5d/036c278d7a013e97e33f08c047fe5583ab4f1fc47c9a49f985f1cdd2a2d7/mypy-1.16.0-cp313-cp313-win_amd64.whl", hash = "sha256:55f9076c6ce55dd3f8cd0c6fff26a008ca8e5131b89d5ba6d86bd3f47e736eeb", size = 9572200 },
+    { url = "https://files.pythonhosted.org/packages/99/a3/6ed10530dec8e0fdc890d81361260c9ef1f5e5c217ad8c9b21ecb2b8366b/mypy-1.16.0-py3-none-any.whl", hash = "sha256:29e1499864a3888bca5c1542f2d7232c6e586295183320caa95758fc84034031", size = 2265773 },
+]
+
+[[package]]
+name = "mypy-extensions"
+version = "1.1.0"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/a2/6e/371856a3fb9d31ca8dac321cda606860fa4548858c0cc45d9d1d4ca2628b/mypy_extensions-1.1.0.tar.gz", hash = "sha256:52e68efc3284861e772bbcd66823fde5ae21fd2fdb51c62a211403730b916558", size = 6343 }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/79/7b/2c79738432f5c924bef5071f933bcc9efd0473bac3b4aa584a6f7c1c8df8/mypy_extensions-1.1.0-py3-none-any.whl", hash = "sha256:1be4cccdb0f2482337c4743e60421de3a356cd97508abadd57d47403e94f5505", size = 4963 },
+]
+
+[[package]]
+name = "packaging"
+version = "25.0"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/a1/d4/1fc4078c65507b51b96ca8f8c3ba19e6a61c8253c72794544580a7b6c24d/packaging-25.0.tar.gz", hash = "sha256:d443872c98d677bf60f6a1f2f8c1cb748e8fe762d2bf9d3148b5599295b0fc4f", size = 165727 }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/20/12/38679034af332785aac8774540895e234f4d07f7545804097de4b666afd8/packaging-25.0-py3-none-any.whl", hash = "sha256:29572ef2b1f17581046b3a2227d5c611fb25ec70ca1ba8554b24b0e69331a484", size = 66469 },
+]
+
+[[package]]
+name = "pathspec"
+version = "0.12.1"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/ca/bc/f35b8446f4531a7cb215605d100cd88b7ac6f44ab3fc94870c120ab3adbf/pathspec-0.12.1.tar.gz", hash = "sha256:a482d51503a1ab33b1c67a6c3813a26953dbdc71c31dacaef9a838c4e29f5712", size = 51043 }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/cc/20/ff623b09d963f88bfde16306a54e12ee5ea43e9b597108672ff3a408aad6/pathspec-0.12.1-py3-none-any.whl", hash = "sha256:a0d503e138a4c123b27490a4f7beda6a01c6f288df0e4a8b79c7eb0dc7b4cc08", size = 31191 },
+]
+
+[[package]]
+name = "pluggy"
+version = "1.6.0"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/f9/e2/3e91f31a7d2b083fe6ef3fa267035b518369d9511ffab804f839851d2779/pluggy-1.6.0.tar.gz", hash = "sha256:7dcc130b76258d33b90f61b658791dede3486c3e6bfb003ee5c9bfb396dd22f3", size = 69412 }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/54/20/4d324d65cc6d9205fabedc306948156824eb9f0ee1633355a8f7ec5c66bf/pluggy-1.6.0-py3-none-any.whl", hash = "sha256:e920276dd6813095e9377c0bc5566d94c932c33b27a3e3945d8389c374dd4746", size = 20538 },
+]
+
+[[package]]
+name = "pydantic"
+version = "2.11.5"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "annotated-types" },
+    { name = "pydantic-core" },
+    { name = "typing-extensions" },
+    { name = "typing-inspection" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/f0/86/8ce9040065e8f924d642c58e4a344e33163a07f6b57f836d0d734e0ad3fb/pydantic-2.11.5.tar.gz", hash = "sha256:7f853db3d0ce78ce8bbb148c401c2cdd6431b3473c0cdff2755c7690952a7b7a", size = 787102 }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/b5/69/831ed22b38ff9b4b64b66569f0e5b7b97cf3638346eb95a2147fdb49ad5f/pydantic-2.11.5-py3-none-any.whl", hash = "sha256:f9c26ba06f9747749ca1e5c94d6a85cb84254577553c8785576fd38fa64dc0f7", size = 444229 },
+]
+
+[[package]]
+name = "pydantic-core"
+version = "2.33.2"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "typing-extensions" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/ad/88/5f2260bdfae97aabf98f1778d43f69574390ad787afb646292a638c923d4/pydantic_core-2.33.2.tar.gz", hash = "sha256:7cb8bc3605c29176e1b105350d2e6474142d7c1bd1d9327c4a9bdb46bf827acc", size = 435195 }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/e5/92/b31726561b5dae176c2d2c2dc43a9c5bfba5d32f96f8b4c0a600dd492447/pydantic_core-2.33.2-cp310-cp310-macosx_10_12_x86_64.whl", hash = "sha256:2b3d326aaef0c0399d9afffeb6367d5e26ddc24d351dbc9c636840ac355dc5d8", size = 2028817 },
+    { url = "https://files.pythonhosted.org/packages/a3/44/3f0b95fafdaca04a483c4e685fe437c6891001bf3ce8b2fded82b9ea3aa1/pydantic_core-2.33.2-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:0e5b2671f05ba48b94cb90ce55d8bdcaaedb8ba00cc5359f6810fc918713983d", size = 1861357 },
+    { url = "https://files.pythonhosted.org/packages/30/97/e8f13b55766234caae05372826e8e4b3b96e7b248be3157f53237682e43c/pydantic_core-2.33.2-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0069c9acc3f3981b9ff4cdfaf088e98d83440a4c7ea1bc07460af3d4dc22e72d", size = 1898011 },
+    { url = "https://files.pythonhosted.org/packages/9b/a3/99c48cf7bafc991cc3ee66fd544c0aae8dc907b752f1dad2d79b1b5a471f/pydantic_core-2.33.2-cp310-cp310-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:d53b22f2032c42eaaf025f7c40c2e3b94568ae077a606f006d206a463bc69572", size = 1982730 },
+    { url = "https://files.pythonhosted.org/packages/de/8e/a5b882ec4307010a840fb8b58bd9bf65d1840c92eae7534c7441709bf54b/pydantic_core-2.33.2-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:0405262705a123b7ce9f0b92f123334d67b70fd1f20a9372b907ce1080c7ba02", size = 2136178 },
+    { url = "https://files.pythonhosted.org/packages/e4/bb/71e35fc3ed05af6834e890edb75968e2802fe98778971ab5cba20a162315/pydantic_core-2.33.2-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:4b25d91e288e2c4e0662b8038a28c6a07eaac3e196cfc4ff69de4ea3db992a1b", size = 2736462 },
+    { url = "https://files.pythonhosted.org/packages/31/0d/c8f7593e6bc7066289bbc366f2235701dcbebcd1ff0ef8e64f6f239fb47d/pydantic_core-2.33.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6bdfe4b3789761f3bcb4b1ddf33355a71079858958e3a552f16d5af19768fef2", size = 2005652 },
+    { url = "https://files.pythonhosted.org/packages/d2/7a/996d8bd75f3eda405e3dd219ff5ff0a283cd8e34add39d8ef9157e722867/pydantic_core-2.33.2-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:efec8db3266b76ef9607c2c4c419bdb06bf335ae433b80816089ea7585816f6a", size = 2113306 },
+    { url = "https://files.pythonhosted.org/packages/ff/84/daf2a6fb2db40ffda6578a7e8c5a6e9c8affb251a05c233ae37098118788/pydantic_core-2.33.2-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:031c57d67ca86902726e0fae2214ce6770bbe2f710dc33063187a68744a5ecac", size = 2073720 },
+    { url = "https://files.pythonhosted.org/packages/77/fb/2258da019f4825128445ae79456a5499c032b55849dbd5bed78c95ccf163/pydantic_core-2.33.2-cp310-cp310-musllinux_1_1_armv7l.whl", hash = "sha256:f8de619080e944347f5f20de29a975c2d815d9ddd8be9b9b7268e2e3ef68605a", size = 2244915 },
+    { url = "https://files.pythonhosted.org/packages/d8/7a/925ff73756031289468326e355b6fa8316960d0d65f8b5d6b3a3e7866de7/pydantic_core-2.33.2-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:73662edf539e72a9440129f231ed3757faab89630d291b784ca99237fb94db2b", size = 2241884 },
+    { url = "https://files.pythonhosted.org/packages/0b/b0/249ee6d2646f1cdadcb813805fe76265745c4010cf20a8eba7b0e639d9b2/pydantic_core-2.33.2-cp310-cp310-win32.whl", hash = "sha256:0a39979dcbb70998b0e505fb1556a1d550a0781463ce84ebf915ba293ccb7e22", size = 1910496 },
+    { url = "https://files.pythonhosted.org/packages/66/ff/172ba8f12a42d4b552917aa65d1f2328990d3ccfc01d5b7c943ec084299f/pydantic_core-2.33.2-cp310-cp310-win_amd64.whl", hash = "sha256:b0379a2b24882fef529ec3b4987cb5d003b9cda32256024e6fe1586ac45fc640", size = 1955019 },
+    { url = "https://files.pythonhosted.org/packages/3f/8d/71db63483d518cbbf290261a1fc2839d17ff89fce7089e08cad07ccfce67/pydantic_core-2.33.2-cp311-cp311-macosx_10_12_x86_64.whl", hash = "sha256:4c5b0a576fb381edd6d27f0a85915c6daf2f8138dc5c267a57c08a62900758c7", size = 2028584 },
+    { url = "https://files.pythonhosted.org/packages/24/2f/3cfa7244ae292dd850989f328722d2aef313f74ffc471184dc509e1e4e5a/pydantic_core-2.33.2-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:e799c050df38a639db758c617ec771fd8fb7a5f8eaaa4b27b101f266b216a246", size = 1855071 },
+    { url = "https://files.pythonhosted.org/packages/b3/d3/4ae42d33f5e3f50dd467761304be2fa0a9417fbf09735bc2cce003480f2a/pydantic_core-2.33.2-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:dc46a01bf8d62f227d5ecee74178ffc448ff4e5197c756331f71efcc66dc980f", size = 1897823 },
+    { url = "https://files.pythonhosted.org/packages/f4/f3/aa5976e8352b7695ff808599794b1fba2a9ae2ee954a3426855935799488/pydantic_core-2.33.2-cp311-cp311-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:a144d4f717285c6d9234a66778059f33a89096dfb9b39117663fd8413d582dcc", size = 1983792 },
+    { url = "https://files.pythonhosted.org/packages/d5/7a/cda9b5a23c552037717f2b2a5257e9b2bfe45e687386df9591eff7b46d28/pydantic_core-2.33.2-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:73cf6373c21bc80b2e0dc88444f41ae60b2f070ed02095754eb5a01df12256de", size = 2136338 },
+    { url = "https://files.pythonhosted.org/packages/2b/9f/b8f9ec8dd1417eb9da784e91e1667d58a2a4a7b7b34cf4af765ef663a7e5/pydantic_core-2.33.2-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:3dc625f4aa79713512d1976fe9f0bc99f706a9dee21dfd1810b4bbbf228d0e8a", size = 2730998 },
+    { url = "https://files.pythonhosted.org/packages/47/bc/cd720e078576bdb8255d5032c5d63ee5c0bf4b7173dd955185a1d658c456/pydantic_core-2.33.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:881b21b5549499972441da4758d662aeea93f1923f953e9cbaff14b8b9565aef", size = 2003200 },
+    { url = "https://files.pythonhosted.org/packages/ca/22/3602b895ee2cd29d11a2b349372446ae9727c32e78a94b3d588a40fdf187/pydantic_core-2.33.2-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:bdc25f3681f7b78572699569514036afe3c243bc3059d3942624e936ec93450e", size = 2113890 },
+    { url = "https://files.pythonhosted.org/packages/ff/e6/e3c5908c03cf00d629eb38393a98fccc38ee0ce8ecce32f69fc7d7b558a7/pydantic_core-2.33.2-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:fe5b32187cbc0c862ee201ad66c30cf218e5ed468ec8dc1cf49dec66e160cc4d", size = 2073359 },
+    { url = "https://files.pythonhosted.org/packages/12/e7/6a36a07c59ebefc8777d1ffdaf5ae71b06b21952582e4b07eba88a421c79/pydantic_core-2.33.2-cp311-cp311-musllinux_1_1_armv7l.whl", hash = "sha256:bc7aee6f634a6f4a95676fcb5d6559a2c2a390330098dba5e5a5f28a2e4ada30", size = 2245883 },
+    { url = "https://files.pythonhosted.org/packages/16/3f/59b3187aaa6cc0c1e6616e8045b284de2b6a87b027cce2ffcea073adf1d2/pydantic_core-2.33.2-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:235f45e5dbcccf6bd99f9f472858849f73d11120d76ea8707115415f8e5ebebf", size = 2241074 },
+    { url = "https://files.pythonhosted.org/packages/e0/ed/55532bb88f674d5d8f67ab121a2a13c385df382de2a1677f30ad385f7438/pydantic_core-2.33.2-cp311-cp311-win32.whl", hash = "sha256:6368900c2d3ef09b69cb0b913f9f8263b03786e5b2a387706c5afb66800efd51", size = 1910538 },
+    { url = "https://files.pythonhosted.org/packages/fe/1b/25b7cccd4519c0b23c2dd636ad39d381abf113085ce4f7bec2b0dc755eb1/pydantic_core-2.33.2-cp311-cp311-win_amd64.whl", hash = "sha256:1e063337ef9e9820c77acc768546325ebe04ee38b08703244c1309cccc4f1bab", size = 1952909 },
+    { url = "https://files.pythonhosted.org/packages/49/a9/d809358e49126438055884c4366a1f6227f0f84f635a9014e2deb9b9de54/pydantic_core-2.33.2-cp311-cp311-win_arm64.whl", hash = "sha256:6b99022f1d19bc32a4c2a0d544fc9a76e3be90f0b3f4af413f87d38749300e65", size = 1897786 },
+    { url = "https://files.pythonhosted.org/packages/18/8a/2b41c97f554ec8c71f2a8a5f85cb56a8b0956addfe8b0efb5b3d77e8bdc3/pydantic_core-2.33.2-cp312-cp312-macosx_10_12_x86_64.whl", hash = "sha256:a7ec89dc587667f22b6a0b6579c249fca9026ce7c333fc142ba42411fa243cdc", size = 2009000 },
+    { url = "https://files.pythonhosted.org/packages/a1/02/6224312aacb3c8ecbaa959897af57181fb6cf3a3d7917fd44d0f2917e6f2/pydantic_core-2.33.2-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:3c6db6e52c6d70aa0d00d45cdb9b40f0433b96380071ea80b09277dba021ddf7", size = 1847996 },
+    { url = "https://files.pythonhosted.org/packages/d6/46/6dcdf084a523dbe0a0be59d054734b86a981726f221f4562aed313dbcb49/pydantic_core-2.33.2-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4e61206137cbc65e6d5256e1166f88331d3b6238e082d9f74613b9b765fb9025", size = 1880957 },
+    { url = "https://files.pythonhosted.org/packages/ec/6b/1ec2c03837ac00886ba8160ce041ce4e325b41d06a034adbef11339ae422/pydantic_core-2.33.2-cp312-cp312-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:eb8c529b2819c37140eb51b914153063d27ed88e3bdc31b71198a198e921e011", size = 1964199 },
+    { url = "https://files.pythonhosted.org/packages/2d/1d/6bf34d6adb9debd9136bd197ca72642203ce9aaaa85cfcbfcf20f9696e83/pydantic_core-2.33.2-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:c52b02ad8b4e2cf14ca7b3d918f3eb0ee91e63b3167c32591e57c4317e134f8f", size = 2120296 },
+    { url = "https://files.pythonhosted.org/packages/e0/94/2bd0aaf5a591e974b32a9f7123f16637776c304471a0ab33cf263cf5591a/pydantic_core-2.33.2-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:96081f1605125ba0855dfda83f6f3df5ec90c61195421ba72223de35ccfb2f88", size = 2676109 },
+    { url = "https://files.pythonhosted.org/packages/f9/41/4b043778cf9c4285d59742281a769eac371b9e47e35f98ad321349cc5d61/pydantic_core-2.33.2-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8f57a69461af2a5fa6e6bbd7a5f60d3b7e6cebb687f55106933188e79ad155c1", size = 2002028 },
+    { url = "https://files.pythonhosted.org/packages/cb/d5/7bb781bf2748ce3d03af04d5c969fa1308880e1dca35a9bd94e1a96a922e/pydantic_core-2.33.2-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:572c7e6c8bb4774d2ac88929e3d1f12bc45714ae5ee6d9a788a9fb35e60bb04b", size = 2100044 },
+    { url = "https://files.pythonhosted.org/packages/fe/36/def5e53e1eb0ad896785702a5bbfd25eed546cdcf4087ad285021a90ed53/pydantic_core-2.33.2-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:db4b41f9bd95fbe5acd76d89920336ba96f03e149097365afe1cb092fceb89a1", size = 2058881 },
+    { url = "https://files.pythonhosted.org/packages/01/6c/57f8d70b2ee57fc3dc8b9610315949837fa8c11d86927b9bb044f8705419/pydantic_core-2.33.2-cp312-cp312-musllinux_1_1_armv7l.whl", hash = "sha256:fa854f5cf7e33842a892e5c73f45327760bc7bc516339fda888c75ae60edaeb6", size = 2227034 },
+    { url = "https://files.pythonhosted.org/packages/27/b9/9c17f0396a82b3d5cbea4c24d742083422639e7bb1d5bf600e12cb176a13/pydantic_core-2.33.2-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:5f483cfb75ff703095c59e365360cb73e00185e01aaea067cd19acffd2ab20ea", size = 2234187 },
+    { url = "https://files.pythonhosted.org/packages/b0/6a/adf5734ffd52bf86d865093ad70b2ce543415e0e356f6cacabbc0d9ad910/pydantic_core-2.33.2-cp312-cp312-win32.whl", hash = "sha256:9cb1da0f5a471435a7bc7e439b8a728e8b61e59784b2af70d7c169f8dd8ae290", size = 1892628 },
+    { url = "https://files.pythonhosted.org/packages/43/e4/5479fecb3606c1368d496a825d8411e126133c41224c1e7238be58b87d7e/pydantic_core-2.33.2-cp312-cp312-win_amd64.whl", hash = "sha256:f941635f2a3d96b2973e867144fde513665c87f13fe0e193c158ac51bfaaa7b2", size = 1955866 },
+    { url = "https://files.pythonhosted.org/packages/0d/24/8b11e8b3e2be9dd82df4b11408a67c61bb4dc4f8e11b5b0fc888b38118b5/pydantic_core-2.33.2-cp312-cp312-win_arm64.whl", hash = "sha256:cca3868ddfaccfbc4bfb1d608e2ccaaebe0ae628e1416aeb9c4d88c001bb45ab", size = 1888894 },
+    { url = "https://files.pythonhosted.org/packages/46/8c/99040727b41f56616573a28771b1bfa08a3d3fe74d3d513f01251f79f172/pydantic_core-2.33.2-cp313-cp313-macosx_10_12_x86_64.whl", hash = "sha256:1082dd3e2d7109ad8b7da48e1d4710c8d06c253cbc4a27c1cff4fbcaa97a9e3f", size = 2015688 },
+    { url = "https://files.pythonhosted.org/packages/3a/cc/5999d1eb705a6cefc31f0b4a90e9f7fc400539b1a1030529700cc1b51838/pydantic_core-2.33.2-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:f517ca031dfc037a9c07e748cefd8d96235088b83b4f4ba8939105d20fa1dcd6", size = 1844808 },
+    { url = "https://files.pythonhosted.org/packages/6f/5e/a0a7b8885c98889a18b6e376f344da1ef323d270b44edf8174d6bce4d622/pydantic_core-2.33.2-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0a9f2c9dd19656823cb8250b0724ee9c60a82f3cdf68a080979d13092a3b0fef", size = 1885580 },
+    { url = "https://files.pythonhosted.org/packages/3b/2a/953581f343c7d11a304581156618c3f592435523dd9d79865903272c256a/pydantic_core-2.33.2-cp313-cp313-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:2b0a451c263b01acebe51895bfb0e1cc842a5c666efe06cdf13846c7418caa9a", size = 1973859 },
+    { url = "https://files.pythonhosted.org/packages/e6/55/f1a813904771c03a3f97f676c62cca0c0a4138654107c1b61f19c644868b/pydantic_core-2.33.2-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:1ea40a64d23faa25e62a70ad163571c0b342b8bf66d5fa612ac0dec4f069d916", size = 2120810 },
+    { url = "https://files.pythonhosted.org/packages/aa/c3/053389835a996e18853ba107a63caae0b9deb4a276c6b472931ea9ae6e48/pydantic_core-2.33.2-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:0fb2d542b4d66f9470e8065c5469ec676978d625a8b7a363f07d9a501a9cb36a", size = 2676498 },
+    { url = "https://files.pythonhosted.org/packages/eb/3c/f4abd740877a35abade05e437245b192f9d0ffb48bbbbd708df33d3cda37/pydantic_core-2.33.2-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9fdac5d6ffa1b5a83bca06ffe7583f5576555e6c8b3a91fbd25ea7780f825f7d", size = 2000611 },
+    { url = "https://files.pythonhosted.org/packages/59/a7/63ef2fed1837d1121a894d0ce88439fe3e3b3e48c7543b2a4479eb99c2bd/pydantic_core-2.33.2-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:04a1a413977ab517154eebb2d326da71638271477d6ad87a769102f7c2488c56", size = 2107924 },
+    { url = "https://files.pythonhosted.org/packages/04/8f/2551964ef045669801675f1cfc3b0d74147f4901c3ffa42be2ddb1f0efc4/pydantic_core-2.33.2-cp313-cp313-musllinux_1_1_aarch64.whl", hash = "sha256:c8e7af2f4e0194c22b5b37205bfb293d166a7344a5b0d0eaccebc376546d77d5", size = 2063196 },
+    { url = "https://files.pythonhosted.org/packages/26/bd/d9602777e77fc6dbb0c7db9ad356e9a985825547dce5ad1d30ee04903918/pydantic_core-2.33.2-cp313-cp313-musllinux_1_1_armv7l.whl", hash = "sha256:5c92edd15cd58b3c2d34873597a1e20f13094f59cf88068adb18947df5455b4e", size = 2236389 },
+    { url = "https://files.pythonhosted.org/packages/42/db/0e950daa7e2230423ab342ae918a794964b053bec24ba8af013fc7c94846/pydantic_core-2.33.2-cp313-cp313-musllinux_1_1_x86_64.whl", hash = "sha256:65132b7b4a1c0beded5e057324b7e16e10910c106d43675d9bd87d4f38dde162", size = 2239223 },
+    { url = "https://files.pythonhosted.org/packages/58/4d/4f937099c545a8a17eb52cb67fe0447fd9a373b348ccfa9a87f141eeb00f/pydantic_core-2.33.2-cp313-cp313-win32.whl", hash = "sha256:52fb90784e0a242bb96ec53f42196a17278855b0f31ac7c3cc6f5c1ec4811849", size = 1900473 },
+    { url = "https://files.pythonhosted.org/packages/a0/75/4a0a9bac998d78d889def5e4ef2b065acba8cae8c93696906c3a91f310ca/pydantic_core-2.33.2-cp313-cp313-win_amd64.whl", hash = "sha256:c083a3bdd5a93dfe480f1125926afcdbf2917ae714bdb80b36d34318b2bec5d9", size = 1955269 },
+    { url = "https://files.pythonhosted.org/packages/f9/86/1beda0576969592f1497b4ce8e7bc8cbdf614c352426271b1b10d5f0aa64/pydantic_core-2.33.2-cp313-cp313-win_arm64.whl", hash = "sha256:e80b087132752f6b3d714f041ccf74403799d3b23a72722ea2e6ba2e892555b9", size = 1893921 },
+    { url = "https://files.pythonhosted.org/packages/a4/7d/e09391c2eebeab681df2b74bfe6c43422fffede8dc74187b2b0bf6fd7571/pydantic_core-2.33.2-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:61c18fba8e5e9db3ab908620af374db0ac1baa69f0f32df4f61ae23f15e586ac", size = 1806162 },
+    { url = "https://files.pythonhosted.org/packages/f1/3d/847b6b1fed9f8ed3bb95a9ad04fbd0b212e832d4f0f50ff4d9ee5a9f15cf/pydantic_core-2.33.2-cp313-cp313t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:95237e53bb015f67b63c91af7518a62a8660376a6a0db19b89acc77a4d6199f5", size = 1981560 },
+    { url = "https://files.pythonhosted.org/packages/6f/9a/e73262f6c6656262b5fdd723ad90f518f579b7bc8622e43a942eec53c938/pydantic_core-2.33.2-cp313-cp313t-win_amd64.whl", hash = "sha256:c2fc0a768ef76c15ab9238afa6da7f69895bb5d1ee83aeea2e3509af4472d0b9", size = 1935777 },
+    { url = "https://files.pythonhosted.org/packages/30/68/373d55e58b7e83ce371691f6eaa7175e3a24b956c44628eb25d7da007917/pydantic_core-2.33.2-pp310-pypy310_pp73-macosx_10_12_x86_64.whl", hash = "sha256:5c4aa4e82353f65e548c476b37e64189783aa5384903bfea4f41580f255fddfa", size = 2023982 },
+    { url = "https://files.pythonhosted.org/packages/a4/16/145f54ac08c96a63d8ed6442f9dec17b2773d19920b627b18d4f10a061ea/pydantic_core-2.33.2-pp310-pypy310_pp73-macosx_11_0_arm64.whl", hash = "sha256:d946c8bf0d5c24bf4fe333af284c59a19358aa3ec18cb3dc4370080da1e8ad29", size = 1858412 },
+    { url = "https://files.pythonhosted.org/packages/41/b1/c6dc6c3e2de4516c0bb2c46f6a373b91b5660312342a0cf5826e38ad82fa/pydantic_core-2.33.2-pp310-pypy310_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:87b31b6846e361ef83fedb187bb5b4372d0da3f7e28d85415efa92d6125d6e6d", size = 1892749 },
+    { url = "https://files.pythonhosted.org/packages/12/73/8cd57e20afba760b21b742106f9dbdfa6697f1570b189c7457a1af4cd8a0/pydantic_core-2.33.2-pp310-pypy310_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:aa9d91b338f2df0508606f7009fde642391425189bba6d8c653afd80fd6bb64e", size = 2067527 },
+    { url = "https://files.pythonhosted.org/packages/e3/d5/0bb5d988cc019b3cba4a78f2d4b3854427fc47ee8ec8e9eaabf787da239c/pydantic_core-2.33.2-pp310-pypy310_pp73-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:2058a32994f1fde4ca0480ab9d1e75a0e8c87c22b53a3ae66554f9af78f2fe8c", size = 2108225 },
+    { url = "https://files.pythonhosted.org/packages/f1/c5/00c02d1571913d496aabf146106ad8239dc132485ee22efe08085084ff7c/pydantic_core-2.33.2-pp310-pypy310_pp73-musllinux_1_1_aarch64.whl", hash = "sha256:0e03262ab796d986f978f79c943fc5f620381be7287148b8010b4097f79a39ec", size = 2069490 },
+    { url = "https://files.pythonhosted.org/packages/22/a8/dccc38768274d3ed3a59b5d06f59ccb845778687652daa71df0cab4040d7/pydantic_core-2.33.2-pp310-pypy310_pp73-musllinux_1_1_armv7l.whl", hash = "sha256:1a8695a8d00c73e50bff9dfda4d540b7dee29ff9b8053e38380426a85ef10052", size = 2237525 },
+    { url = "https://files.pythonhosted.org/packages/d4/e7/4f98c0b125dda7cf7ccd14ba936218397b44f50a56dd8c16a3091df116c3/pydantic_core-2.33.2-pp310-pypy310_pp73-musllinux_1_1_x86_64.whl", hash = "sha256:fa754d1850735a0b0e03bcffd9d4b4343eb417e47196e4485d9cca326073a42c", size = 2238446 },
+    { url = "https://files.pythonhosted.org/packages/ce/91/2ec36480fdb0b783cd9ef6795753c1dea13882f2e68e73bce76ae8c21e6a/pydantic_core-2.33.2-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:a11c8d26a50bfab49002947d3d237abe4d9e4b5bdc8846a63537b6488e197808", size = 2066678 },
+    { url = "https://files.pythonhosted.org/packages/7b/27/d4ae6487d73948d6f20dddcd94be4ea43e74349b56eba82e9bdee2d7494c/pydantic_core-2.33.2-pp311-pypy311_pp73-macosx_10_12_x86_64.whl", hash = "sha256:dd14041875d09cc0f9308e37a6f8b65f5585cf2598a53aa0123df8b129d481f8", size = 2025200 },
+    { url = "https://files.pythonhosted.org/packages/f1/b8/b3cb95375f05d33801024079b9392a5ab45267a63400bf1866e7ce0f0de4/pydantic_core-2.33.2-pp311-pypy311_pp73-macosx_11_0_arm64.whl", hash = "sha256:d87c561733f66531dced0da6e864f44ebf89a8fba55f31407b00c2f7f9449593", size = 1859123 },
+    { url = "https://files.pythonhosted.org/packages/05/bc/0d0b5adeda59a261cd30a1235a445bf55c7e46ae44aea28f7bd6ed46e091/pydantic_core-2.33.2-pp311-pypy311_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:2f82865531efd18d6e07a04a17331af02cb7a651583c418df8266f17a63c6612", size = 1892852 },
+    { url = "https://files.pythonhosted.org/packages/3e/11/d37bdebbda2e449cb3f519f6ce950927b56d62f0b84fd9cb9e372a26a3d5/pydantic_core-2.33.2-pp311-pypy311_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2bfb5112df54209d820d7bf9317c7a6c9025ea52e49f46b6a2060104bba37de7", size = 2067484 },
+    { url = "https://files.pythonhosted.org/packages/8c/55/1f95f0a05ce72ecb02a8a8a1c3be0579bbc29b1d5ab68f1378b7bebc5057/pydantic_core-2.33.2-pp311-pypy311_pp73-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:64632ff9d614e5eecfb495796ad51b0ed98c453e447a76bcbeeb69615079fc7e", size = 2108896 },
+    { url = "https://files.pythonhosted.org/packages/53/89/2b2de6c81fa131f423246a9109d7b2a375e83968ad0800d6e57d0574629b/pydantic_core-2.33.2-pp311-pypy311_pp73-musllinux_1_1_aarch64.whl", hash = "sha256:f889f7a40498cc077332c7ab6b4608d296d852182211787d4f3ee377aaae66e8", size = 2069475 },
+    { url = "https://files.pythonhosted.org/packages/b8/e9/1f7efbe20d0b2b10f6718944b5d8ece9152390904f29a78e68d4e7961159/pydantic_core-2.33.2-pp311-pypy311_pp73-musllinux_1_1_armv7l.whl", hash = "sha256:de4b83bb311557e439b9e186f733f6c645b9417c84e2eb8203f3f820a4b988bf", size = 2239013 },
+    { url = "https://files.pythonhosted.org/packages/3c/b2/5309c905a93811524a49b4e031e9851a6b00ff0fb668794472ea7746b448/pydantic_core-2.33.2-pp311-pypy311_pp73-musllinux_1_1_x86_64.whl", hash = "sha256:82f68293f055f51b51ea42fafc74b6aad03e70e191799430b90c13d643059ebb", size = 2238715 },
+    { url = "https://files.pythonhosted.org/packages/32/56/8a7ca5d2cd2cda1d245d34b1c9a942920a718082ae8e54e5f3e5a58b7add/pydantic_core-2.33.2-pp311-pypy311_pp73-win_amd64.whl", hash = "sha256:329467cecfb529c925cf2bbd4d60d2c509bc2fb52a20c1045bf09bb70971a9c1", size = 2066757 },
+]
+
+[[package]]
+name = "pygments"
+version = "2.19.1"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/7c/2d/c3338d48ea6cc0feb8446d8e6937e1408088a72a39937982cc6111d17f84/pygments-2.19.1.tar.gz", hash = "sha256:61c16d2a8576dc0649d9f39e089b5f02bcd27fba10d8fb4dcc28173f7a45151f", size = 4968581 }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/8a/0b/9fcc47d19c48b59121088dd6da2488a49d5f72dacf8262e2790a1d2c7d15/pygments-2.19.1-py3-none-any.whl", hash = "sha256:9ea1544ad55cecf4b8242fab6dd35a93bbce657034b0611ee383099054ab6d8c", size = 1225293 },
+]
+
+[[package]]
+name = "pytest"
+version = "8.4.0"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "colorama", marker = "sys_platform == 'win32'" },
+    { name = "exceptiongroup", marker = "python_full_version < '3.11'" },
+    { name = "iniconfig" },
+    { name = "packaging" },
+    { name = "pluggy" },
+    { name = "pygments" },
+    { name = "tomli", marker = "python_full_version < '3.11'" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/fb/aa/405082ce2749be5398045152251ac69c0f3578c7077efc53431303af97ce/pytest-8.4.0.tar.gz", hash = "sha256:14d920b48472ea0dbf68e45b96cd1ffda4705f33307dcc86c676c1b5104838a6", size = 1515232 }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/2f/de/afa024cbe022b1b318a3d224125aa24939e99b4ff6f22e0ba639a2eaee47/pytest-8.4.0-py3-none-any.whl", hash = "sha256:f40f825768ad76c0977cbacdf1fd37c6f7a468e460ea6a0636078f8972d4517e", size = 363797 },
+]
+
+[[package]]
+name = "pytest-asyncio"
+version = "1.0.0"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "pytest" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/d0/d4/14f53324cb1a6381bef29d698987625d80052bb33932d8e7cbf9b337b17c/pytest_asyncio-1.0.0.tar.gz", hash = "sha256:d15463d13f4456e1ead2594520216b225a16f781e144f8fdf6c5bb4667c48b3f", size = 46960 }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/30/05/ce271016e351fddc8399e546f6e23761967ee09c8c568bbfbecb0c150171/pytest_asyncio-1.0.0-py3-none-any.whl", hash = "sha256:4f024da9f1ef945e680dc68610b52550e36590a67fd31bb3b4943979a1f90ef3", size = 15976 },
+]
+
+[[package]]
+name = "pytest-httpx"
+version = "0.35.0"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "httpx" },
+    { name = "pytest" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/1f/89/5b12b7b29e3d0af3a4b9c071ee92fa25a9017453731a38f08ba01c280f4c/pytest_httpx-0.35.0.tar.gz", hash = "sha256:d619ad5d2e67734abfbb224c3d9025d64795d4b8711116b1a13f72a251ae511f", size = 54146 }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/b0/ed/026d467c1853dd83102411a78126b4842618e86c895f93528b0528c7a620/pytest_httpx-0.35.0-py3-none-any.whl", hash = "sha256:ee11a00ffcea94a5cbff47af2114d34c5b231c326902458deed73f9c459fd744", size = 19442 },
+]
+
+[[package]]
+name = "ruff"
+version = "0.11.13"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/ed/da/9c6f995903b4d9474b39da91d2d626659af3ff1eeb43e9ae7c119349dba6/ruff-0.11.13.tar.gz", hash = "sha256:26fa247dc68d1d4e72c179e08889a25ac0c7ba4d78aecfc835d49cbfd60bf514", size = 4282054 }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/7d/ce/a11d381192966e0b4290842cc8d4fac7dc9214ddf627c11c1afff87da29b/ruff-0.11.13-py3-none-linux_armv6l.whl", hash = "sha256:4bdfbf1240533f40042ec00c9e09a3aade6f8c10b6414cf11b519488d2635d46", size = 10292516 },
+    { url = "https://files.pythonhosted.org/packages/78/db/87c3b59b0d4e753e40b6a3b4a2642dfd1dcaefbff121ddc64d6c8b47ba00/ruff-0.11.13-py3-none-macosx_10_12_x86_64.whl", hash = "sha256:aef9c9ed1b5ca28bb15c7eac83b8670cf3b20b478195bd49c8d756ba0a36cf48", size = 11106083 },
+    { url = "https://files.pythonhosted.org/packages/77/79/d8cec175856ff810a19825d09ce700265f905c643c69f45d2b737e4a470a/ruff-0.11.13-py3-none-macosx_11_0_arm64.whl", hash = "sha256:53b15a9dfdce029c842e9a5aebc3855e9ab7771395979ff85b7c1dedb53ddc2b", size = 10436024 },
+    { url = "https://files.pythonhosted.org/packages/8b/5b/f6d94f2980fa1ee854b41568368a2e1252681b9238ab2895e133d303538f/ruff-0.11.13-py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ab153241400789138d13f362c43f7edecc0edfffce2afa6a68434000ecd8f69a", size = 10646324 },
+    { url = "https://files.pythonhosted.org/packages/6c/9c/b4c2acf24ea4426016d511dfdc787f4ce1ceb835f3c5fbdbcb32b1c63bda/ruff-0.11.13-py3-none-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:6c51f93029d54a910d3d24f7dd0bb909e31b6cd989a5e4ac513f4eb41629f0dc", size = 10174416 },
+    { url = "https://files.pythonhosted.org/packages/f3/10/e2e62f77c65ede8cd032c2ca39c41f48feabedb6e282bfd6073d81bb671d/ruff-0.11.13-py3-none-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:1808b3ed53e1a777c2ef733aca9051dc9bf7c99b26ece15cb59a0320fbdbd629", size = 11724197 },
+    { url = "https://files.pythonhosted.org/packages/bb/f0/466fe8469b85c561e081d798c45f8a1d21e0b4a5ef795a1d7f1a9a9ec182/ruff-0.11.13-py3-none-manylinux_2_17_ppc64.manylinux2014_ppc64.whl", hash = "sha256:d28ce58b5ecf0f43c1b71edffabe6ed7f245d5336b17805803312ec9bc665933", size = 12511615 },
+    { url = "https://files.pythonhosted.org/packages/17/0e/cefe778b46dbd0cbcb03a839946c8f80a06f7968eb298aa4d1a4293f3448/ruff-0.11.13-py3-none-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:55e4bc3a77842da33c16d55b32c6cac1ec5fb0fbec9c8c513bdce76c4f922165", size = 12117080 },
+    { url = "https://files.pythonhosted.org/packages/5d/2c/caaeda564cbe103bed145ea557cb86795b18651b0f6b3ff6a10e84e5a33f/ruff-0.11.13-py3-none-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:633bf2c6f35678c56ec73189ba6fa19ff1c5e4807a78bf60ef487b9dd272cc71", size = 11326315 },
+    { url = "https://files.pythonhosted.org/packages/75/f0/782e7d681d660eda8c536962920c41309e6dd4ebcea9a2714ed5127d44bd/ruff-0.11.13-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4ffbc82d70424b275b089166310448051afdc6e914fdab90e08df66c43bb5ca9", size = 11555640 },
+    { url = "https://files.pythonhosted.org/packages/5d/d4/3d580c616316c7f07fb3c99dbecfe01fbaea7b6fd9a82b801e72e5de742a/ruff-0.11.13-py3-none-musllinux_1_2_aarch64.whl", hash = "sha256:4a9ddd3ec62a9a89578c85842b836e4ac832d4a2e0bfaad3b02243f930ceafcc", size = 10507364 },
+    { url = "https://files.pythonhosted.org/packages/5a/dc/195e6f17d7b3ea6b12dc4f3e9de575db7983db187c378d44606e5d503319/ruff-0.11.13-py3-none-musllinux_1_2_armv7l.whl", hash = "sha256:d237a496e0778d719efb05058c64d28b757c77824e04ffe8796c7436e26712b7", size = 10141462 },
+    { url = "https://files.pythonhosted.org/packages/f4/8e/39a094af6967faa57ecdeacb91bedfb232474ff8c3d20f16a5514e6b3534/ruff-0.11.13-py3-none-musllinux_1_2_i686.whl", hash = "sha256:26816a218ca6ef02142343fd24c70f7cd8c5aa6c203bca284407adf675984432", size = 11121028 },
+    { url = "https://files.pythonhosted.org/packages/5a/c0/b0b508193b0e8a1654ec683ebab18d309861f8bd64e3a2f9648b80d392cb/ruff-0.11.13-py3-none-musllinux_1_2_x86_64.whl", hash = "sha256:51c3f95abd9331dc5b87c47ac7f376db5616041173826dfd556cfe3d4977f492", size = 11602992 },
+    { url = "https://files.pythonhosted.org/packages/7c/91/263e33ab93ab09ca06ce4f8f8547a858cc198072f873ebc9be7466790bae/ruff-0.11.13-py3-none-win32.whl", hash = "sha256:96c27935418e4e8e77a26bb05962817f28b8ef3843a6c6cc49d8783b5507f250", size = 10474944 },
+    { url = "https://files.pythonhosted.org/packages/46/f4/7c27734ac2073aae8efb0119cae6931b6fb48017adf048fdf85c19337afc/ruff-0.11.13-py3-none-win_amd64.whl", hash = "sha256:29c3189895a8a6a657b7af4e97d330c8a3afd2c9c8f46c81e2fc5a31866517e3", size = 11548669 },
+    { url = "https://files.pythonhosted.org/packages/ec/bf/b273dd11673fed8a6bd46032c0ea2a04b2ac9bfa9c628756a5856ba113b0/ruff-0.11.13-py3-none-win_arm64.whl", hash = "sha256:b4385285e9179d608ff1d2fb9922062663c658605819a6876d8beef0c30b7f3b", size = 10683928 },
+]
+
+[[package]]
+name = "sniffio"
+version = "1.3.1"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/a2/87/a6771e1546d97e7e041b6ae58d80074f81b7d5121207425c964ddf5cfdbd/sniffio-1.3.1.tar.gz", hash = "sha256:f4324edc670a0f49750a81b895f35c3adb843cca46f0530f79fc1babb23789dc", size = 20372 }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/e9/44/75a9c9421471a6c4805dbf2356f7c181a29c1879239abab1ea2cc8f38b40/sniffio-1.3.1-py3-none-any.whl", hash = "sha256:2f6da418d1f1e0fddd844478f41680e794e6051915791a034ff65e5f100525a2", size = 10235 },
+]
+
+[[package]]
+name = "tomli"
+version = "2.2.1"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/18/87/302344fed471e44a87289cf4967697d07e532f2421fdaf868a303cbae4ff/tomli-2.2.1.tar.gz", hash = "sha256:cd45e1dc79c835ce60f7404ec8119f2eb06d38b1deba146f07ced3bbc44505ff", size = 17175 }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/43/ca/75707e6efa2b37c77dadb324ae7d9571cb424e61ea73fad7c56c2d14527f/tomli-2.2.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:678e4fa69e4575eb77d103de3df8a895e1591b48e740211bd1067378c69e8249", size = 131077 },
+    { url = "https://files.pythonhosted.org/packages/c7/16/51ae563a8615d472fdbffc43a3f3d46588c264ac4f024f63f01283becfbb/tomli-2.2.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:023aa114dd824ade0100497eb2318602af309e5a55595f76b626d6d9f3b7b0a6", size = 123429 },
+    { url = "https://files.pythonhosted.org/packages/f1/dd/4f6cd1e7b160041db83c694abc78e100473c15d54620083dbd5aae7b990e/tomli-2.2.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ece47d672db52ac607a3d9599a9d48dcb2f2f735c6c2d1f34130085bb12b112a", size = 226067 },
+    { url = "https://files.pythonhosted.org/packages/a9/6b/c54ede5dc70d648cc6361eaf429304b02f2871a345bbdd51e993d6cdf550/tomli-2.2.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6972ca9c9cc9f0acaa56a8ca1ff51e7af152a9f87fb64623e31d5c83700080ee", size = 236030 },
+    { url = "https://files.pythonhosted.org/packages/1f/47/999514fa49cfaf7a92c805a86c3c43f4215621855d151b61c602abb38091/tomli-2.2.1-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:c954d2250168d28797dd4e3ac5cf812a406cd5a92674ee4c8f123c889786aa8e", size = 240898 },
+    { url = "https://files.pythonhosted.org/packages/73/41/0a01279a7ae09ee1573b423318e7934674ce06eb33f50936655071d81a24/tomli-2.2.1-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:8dd28b3e155b80f4d54beb40a441d366adcfe740969820caf156c019fb5c7ec4", size = 229894 },
+    { url = "https://files.pythonhosted.org/packages/55/18/5d8bc5b0a0362311ce4d18830a5d28943667599a60d20118074ea1b01bb7/tomli-2.2.1-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:e59e304978767a54663af13c07b3d1af22ddee3bb2fb0618ca1593e4f593a106", size = 245319 },
+    { url = "https://files.pythonhosted.org/packages/92/a3/7ade0576d17f3cdf5ff44d61390d4b3febb8a9fc2b480c75c47ea048c646/tomli-2.2.1-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:33580bccab0338d00994d7f16f4c4ec25b776af3ffaac1ed74e0b3fc95e885a8", size = 238273 },
+    { url = "https://files.pythonhosted.org/packages/72/6f/fa64ef058ac1446a1e51110c375339b3ec6be245af9d14c87c4a6412dd32/tomli-2.2.1-cp311-cp311-win32.whl", hash = "sha256:465af0e0875402f1d226519c9904f37254b3045fc5084697cefb9bdde1ff99ff", size = 98310 },
+    { url = "https://files.pythonhosted.org/packages/6a/1c/4a2dcde4a51b81be3530565e92eda625d94dafb46dbeb15069df4caffc34/tomli-2.2.1-cp311-cp311-win_amd64.whl", hash = "sha256:2d0f2fdd22b02c6d81637a3c95f8cd77f995846af7414c5c4b8d0545afa1bc4b", size = 108309 },
+    { url = "https://files.pythonhosted.org/packages/52/e1/f8af4c2fcde17500422858155aeb0d7e93477a0d59a98e56cbfe75070fd0/tomli-2.2.1-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:4a8f6e44de52d5e6c657c9fe83b562f5f4256d8ebbfe4ff922c495620a7f6cea", size = 132762 },
+    { url = "https://files.pythonhosted.org/packages/03/b8/152c68bb84fc00396b83e7bbddd5ec0bd3dd409db4195e2a9b3e398ad2e3/tomli-2.2.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:8d57ca8095a641b8237d5b079147646153d22552f1c637fd3ba7f4b0b29167a8", size = 123453 },
+    { url = "https://files.pythonhosted.org/packages/c8/d6/fc9267af9166f79ac528ff7e8c55c8181ded34eb4b0e93daa767b8841573/tomli-2.2.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4e340144ad7ae1533cb897d406382b4b6fede8890a03738ff1683af800d54192", size = 233486 },
+    { url = "https://files.pythonhosted.org/packages/5c/51/51c3f2884d7bab89af25f678447ea7d297b53b5a3b5730a7cb2ef6069f07/tomli-2.2.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:db2b95f9de79181805df90bedc5a5ab4c165e6ec3fe99f970d0e302f384ad222", size = 242349 },
+    { url = "https://files.pythonhosted.org/packages/ab/df/bfa89627d13a5cc22402e441e8a931ef2108403db390ff3345c05253935e/tomli-2.2.1-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:40741994320b232529c802f8bc86da4e1aa9f413db394617b9a256ae0f9a7f77", size = 252159 },
+    { url = "https://files.pythonhosted.org/packages/9e/6e/fa2b916dced65763a5168c6ccb91066f7639bdc88b48adda990db10c8c0b/tomli-2.2.1-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:400e720fe168c0f8521520190686ef8ef033fb19fc493da09779e592861b78c6", size = 237243 },
+    { url = "https://files.pythonhosted.org/packages/b4/04/885d3b1f650e1153cbb93a6a9782c58a972b94ea4483ae4ac5cedd5e4a09/tomli-2.2.1-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:02abe224de6ae62c19f090f68da4e27b10af2b93213d36cf44e6e1c5abd19fdd", size = 259645 },
+    { url = "https://files.pythonhosted.org/packages/9c/de/6b432d66e986e501586da298e28ebeefd3edc2c780f3ad73d22566034239/tomli-2.2.1-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:b82ebccc8c8a36f2094e969560a1b836758481f3dc360ce9a3277c65f374285e", size = 244584 },
+    { url = "https://files.pythonhosted.org/packages/1c/9a/47c0449b98e6e7d1be6cbac02f93dd79003234ddc4aaab6ba07a9a7482e2/tomli-2.2.1-cp312-cp312-win32.whl", hash = "sha256:889f80ef92701b9dbb224e49ec87c645ce5df3fa2cc548664eb8a25e03127a98", size = 98875 },
+    { url = "https://files.pythonhosted.org/packages/ef/60/9b9638f081c6f1261e2688bd487625cd1e660d0a85bd469e91d8db969734/tomli-2.2.1-cp312-cp312-win_amd64.whl", hash = "sha256:7fc04e92e1d624a4a63c76474610238576942d6b8950a2d7f908a340494e67e4", size = 109418 },
+    { url = "https://files.pythonhosted.org/packages/04/90/2ee5f2e0362cb8a0b6499dc44f4d7d48f8fff06d28ba46e6f1eaa61a1388/tomli-2.2.1-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:f4039b9cbc3048b2416cc57ab3bda989a6fcf9b36cf8937f01a6e731b64f80d7", size = 132708 },
+    { url = "https://files.pythonhosted.org/packages/c0/ec/46b4108816de6b385141f082ba99e315501ccd0a2ea23db4a100dd3990ea/tomli-2.2.1-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:286f0ca2ffeeb5b9bd4fcc8d6c330534323ec51b2f52da063b11c502da16f30c", size = 123582 },
+    { url = "https://files.pythonhosted.org/packages/a0/bd/b470466d0137b37b68d24556c38a0cc819e8febe392d5b199dcd7f578365/tomli-2.2.1-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a92ef1a44547e894e2a17d24e7557a5e85a9e1d0048b0b5e7541f76c5032cb13", size = 232543 },
+    { url = "https://files.pythonhosted.org/packages/d9/e5/82e80ff3b751373f7cead2815bcbe2d51c895b3c990686741a8e56ec42ab/tomli-2.2.1-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9316dc65bed1684c9a98ee68759ceaed29d229e985297003e494aa825ebb0281", size = 241691 },
+    { url = "https://files.pythonhosted.org/packages/05/7e/2a110bc2713557d6a1bfb06af23dd01e7dde52b6ee7dadc589868f9abfac/tomli-2.2.1-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:e85e99945e688e32d5a35c1ff38ed0b3f41f43fad8df0bdf79f72b2ba7bc5272", size = 251170 },
+    { url = "https://files.pythonhosted.org/packages/64/7b/22d713946efe00e0adbcdfd6d1aa119ae03fd0b60ebed51ebb3fa9f5a2e5/tomli-2.2.1-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:ac065718db92ca818f8d6141b5f66369833d4a80a9d74435a268c52bdfa73140", size = 236530 },
+    { url = "https://files.pythonhosted.org/packages/38/31/3a76f67da4b0cf37b742ca76beaf819dca0ebef26d78fc794a576e08accf/tomli-2.2.1-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:d920f33822747519673ee656a4b6ac33e382eca9d331c87770faa3eef562aeb2", size = 258666 },
+    { url = "https://files.pythonhosted.org/packages/07/10/5af1293da642aded87e8a988753945d0cf7e00a9452d3911dd3bb354c9e2/tomli-2.2.1-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:a198f10c4d1b1375d7687bc25294306e551bf1abfa4eace6650070a5c1ae2744", size = 243954 },
+    { url = "https://files.pythonhosted.org/packages/5b/b9/1ed31d167be802da0fc95020d04cd27b7d7065cc6fbefdd2f9186f60d7bd/tomli-2.2.1-cp313-cp313-win32.whl", hash = "sha256:d3f5614314d758649ab2ab3a62d4f2004c825922f9e370b29416484086b264ec", size = 98724 },
+    { url = "https://files.pythonhosted.org/packages/c7/32/b0963458706accd9afcfeb867c0f9175a741bf7b19cd424230714d722198/tomli-2.2.1-cp313-cp313-win_amd64.whl", hash = "sha256:a38aa0308e754b0e3c67e344754dff64999ff9b513e691d0e786265c93583c69", size = 109383 },
+    { url = "https://files.pythonhosted.org/packages/6e/c2/61d3e0f47e2b74ef40a68b9e6ad5984f6241a942f7cd3bbfbdbd03861ea9/tomli-2.2.1-py3-none-any.whl", hash = "sha256:cb55c73c5f4408779d0cf3eef9f762b9c9f147a77de7b258bef0a5628adc85cc", size = 14257 },
+]
+
+[[package]]
+name = "typing-extensions"
+version = "4.14.0"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/d1/bc/51647cd02527e87d05cb083ccc402f93e441606ff1f01739a62c8ad09ba5/typing_extensions-4.14.0.tar.gz", hash = "sha256:8676b788e32f02ab42d9e7c61324048ae4c6d844a399eebace3d4979d75ceef4", size = 107423 }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/69/e0/552843e0d356fbb5256d21449fa957fa4eff3bbc135a74a691ee70c7c5da/typing_extensions-4.14.0-py3-none-any.whl", hash = "sha256:a1514509136dd0b477638fc68d6a91497af5076466ad0fa6c338e44e359944af", size = 43839 },
+]
+
+[[package]]
+name = "typing-inspection"
+version = "0.4.1"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "typing-extensions" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/f8/b1/0c11f5058406b3af7609f121aaa6b609744687f1d158b3c3a5bf4cc94238/typing_inspection-0.4.1.tar.gz", hash = "sha256:6ae134cc0203c33377d43188d4064e9b357dba58cff3185f22924610e70a9d28", size = 75726 }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/17/69/cd203477f944c353c31bade965f880aa1061fd6bf05ded0726ca845b6ff7/typing_inspection-0.4.1-py3-none-any.whl", hash = "sha256:389055682238f53b04f7badcb49b989835495a96700ced5dab2d8feae4b26f51", size = 14552 },
+]
+
+[[package]]
+name = "ulid-py"
+version = "1.1.0"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/3b/53/d14a8ec344048e21431821cb49e9a6722384f982b889c2dd449428dbdcc1/ulid-py-1.1.0.tar.gz", hash = "sha256:dc6884be91558df077c3011b9fb0c87d1097cb8fc6534b11f310161afd5738f0", size = 22514 }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/42/7c/a12c879fe6c2b136a718c142115ff99397fbf62b4929d970d58ae386d55f/ulid_py-1.1.0-py2.py3-none-any.whl", hash = "sha256:b56a0f809ef90d6020b21b89a87a48edc7c03aea80e5ed5174172e82d76e3987", size = 25753 },
+]
diff --git a/agent_memory_server/__init__.py b/agent_memory_server/__init__.py
index 7ea0135..95f9e1f 100644
--- a/agent_memory_server/__init__.py
+++ b/agent_memory_server/__init__.py
@@ -1,3 +1,3 @@
 """Redis Agent Memory Server - A memory system for conversational AI."""
 
-__version__ = "0.1.0"
+__version__ = "0.9.0"
diff --git a/agent_memory_server/api.py b/agent_memory_server/api.py
index 02a65a6..a7fe7dc 100644
--- a/agent_memory_server/api.py
+++ b/agent_memory_server/api.py
@@ -1,8 +1,8 @@
 import tiktoken
+import ulid
 from fastapi import APIRouter, Depends, HTTPException
 from mcp.server.fastmcp.prompts import base
 from mcp.types import TextContent
-from ulid import ULID
 
 from agent_memory_server import long_term_memory, working_memory
 from agent_memory_server.auth import UserInfo, get_current_user
@@ -279,7 +279,7 @@ async def put_session_memory(
 
             memories = [
                 MemoryRecord(
-                    id=str(ULID()),
+                    id=str(ulid.new()),
                     session_id=session_id,
                     text=f"{msg.role}: {msg.content}",
                     namespace=updated_memory.namespace,
diff --git a/agent_memory_server/client/__init__.py b/agent_memory_server/client/__init__.py
deleted file mode 100644
index e69de29..0000000
diff --git a/agent_memory_server/long_term_memory.py b/agent_memory_server/long_term_memory.py
index 08e55eb..a872a15 100644
--- a/agent_memory_server/long_term_memory.py
+++ b/agent_memory_server/long_term_memory.py
@@ -6,11 +6,11 @@
 from functools import reduce
 from typing import Any
 
+import ulid
 from redis.asyncio import Redis
 from redis.commands.search.query import Query
 from redisvl.query import VectorQuery, VectorRangeQuery
 from redisvl.utils.vectorize import OpenAITextVectorizer
-from ulid import ULID
 
 from agent_memory_server.config import settings
 from agent_memory_server.dependencies import get_background_tasks
@@ -244,7 +244,7 @@ async def merge_memories_with_llm(memories: list[dict], llm_client: Any = None)
     # Create the merged memory
     merged_memory = {
         "text": merged_text.strip(),
-        "id_": str(ULID()),
+        "id_": str(ulid.new()),
         "user_id": user_id,
         "session_id": session_id,
         "namespace": namespace,
@@ -664,7 +664,7 @@ async def index_long_term_memories(
     async with redis.pipeline(transaction=False) as pipe:
         for idx, vector in enumerate(embeddings):
             memory = processed_memories[idx]
-            id_ = memory.id if memory.id else str(ULID())
+            id_ = memory.id if memory.id else str(ulid.new())
             key = Keys.memory_key(id_, memory.namespace)
 
             # Generate memory hash for the memory
@@ -1426,7 +1426,7 @@ async def deduplicate_by_semantic_search(
 
             # Convert back to LongTermMemory
             merged_memory_obj = MemoryRecord(
-                id=memory.id or str(ULID()),
+                id=memory.id or str(ulid.new()),
                 text=merged_memory["text"],
                 user_id=merged_memory["user_id"],
                 session_id=merged_memory["session_id"],
@@ -1646,7 +1646,7 @@ async def extract_memories_from_messages(
 
                     # Create a new memory record from the extraction
                     extracted_memory = MemoryRecord(
-                        id=str(ULID()),  # Server-generated ID
+                        id=str(ulid.new()),  # Server-generated ID
                         text=memory_data["text"],
                         memory_type=memory_data.get("type", "semantic"),
                         topics=memory_data.get("topics", []),
diff --git a/agent_memory_server/models.py b/agent_memory_server/models.py
index 7b70dde..5e16c08 100644
--- a/agent_memory_server/models.py
+++ b/agent_memory_server/models.py
@@ -3,9 +3,9 @@
 from enum import Enum
 from typing import Literal
 
+import ulid
 from mcp.server.fastmcp.prompts import base
 from pydantic import BaseModel, Field
-from ulid import ULID
 
 from agent_memory_server.config import settings
 from agent_memory_server.filters import (
@@ -143,7 +143,7 @@ class ClientMemoryRecord(MemoryRecord):
     """A memory record with a client-provided ID"""
 
     id: str = Field(
-        default=str(ULID()),
+        default_factory=lambda: str(ulid.new()),
         description="Client-provided ID for deduplication and overwrites",
     )
 
diff --git a/pyproject.toml b/pyproject.toml
index ed33b35..18e8f79 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -4,7 +4,7 @@ build-backend = "hatchling.build"
 
 [project]
 name = "agent-memory-server"
-version = "0.2.0"
+# version = "0.9.0"
 description = "A Memory Server for LLM Agents and Applications"
 readme = "README.md"
 requires-python = ">=3.12,<3.13"
@@ -12,6 +12,7 @@ license = { text = "MIT" }
 authors = [{ name = "Andrew Brookins", email = "andrew.brookins@redis.com" }]
 dependencies = [
     "accelerate>=1.6.0",
+    "agent-memory-client @ file:///Users/andrew.brookins/src/redis-memory-server/agent-memory-client",
     "anthropic>=0.15.0",
     "bertopic<0.17.0,>=0.16.4",
     "fastapi>=0.115.11",
@@ -41,6 +42,15 @@ dependencies = [
 [project.scripts]
 agent-memory = "agent_memory_server.cli:cli"
 
+[project.urls]
+Homepage = "https://github.com/redis-developer/agent-memory-server"
+Repository = "https://github.com/redis-developer/agent-memory-server"
+Documentation = "https://github.com/redis-developer/agent-memory-server/tree/main/docs"
+Issues = "https://github.com/redis-developer/agent-memory-server/issues"
+
+[tool.hatch.version]
+path = "agent_memory_server/__init__.py"
+
 [tool.hatch.build.targets.wheel]
 packages = ["agent_memory_server"]
 
diff --git a/tests/conftest.py b/tests/conftest.py
index 7276a16..fc44671 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -144,7 +144,7 @@ async def session(use_test_redis_connection, async_redis_client):
         long_term_memories = []
         for msg in messages:
             memory = MemoryRecord(
-                id=str(ulid.ULID()),
+                id=str(ulid.new()),
                 text=f"{msg.role}: {msg.content}",
                 session_id=session_id,
                 namespace=namespace,
@@ -163,7 +163,7 @@ async def session(use_test_redis_connection, async_redis_client):
         async with use_test_redis_connection.pipeline(transaction=False) as pipe:
             for idx, vector in enumerate(embeddings):
                 memory = long_term_memories[idx]
-                id_ = memory.id if memory.id else str(ulid.ULID())
+                id_ = memory.id if memory.id else str(ulid.new())
                 key = Keys.memory_key(id_, memory.namespace)
 
                 # Generate memory hash for the memory
diff --git a/tests/test_client_api.py b/tests/test_client_api.py
index d0e0e58..01dc9e8 100644
--- a/tests/test_client_api.py
+++ b/tests/test_client_api.py
@@ -8,14 +8,14 @@
 from unittest.mock import AsyncMock, MagicMock, patch
 
 import pytest
+from agent_memory_client import MemoryAPIClient, MemoryClientConfig
+from agent_memory_client.filters import Namespace, SessionId, Topics
 from fastapi import FastAPI
 from httpx import ASGITransport, AsyncClient
 from mcp.server.fastmcp.prompts import base
 from mcp.types import TextContent
 
 from agent_memory_server.api import router as memory_router
-from agent_memory_server.client.api import MemoryAPIClient, MemoryClientConfig
-from agent_memory_server.filters import Namespace, SessionId, Topics
 from agent_memory_server.healthcheck import router as health_router
 from agent_memory_server.models import (
     MemoryMessage,
@@ -30,6 +30,29 @@
 )
 
 
+class MockMessage:
+    """Mock message class to simulate MCP message objects for testing"""
+
+    def __init__(self, message_dict):
+        self.content = MockContent(message_dict.get("content", {}))
+        self.role = message_dict.get("role", "user")
+
+
+class MockContent:
+    """Mock content class to simulate TextContent for testing"""
+
+    def __init__(self, content_dict):
+        self.text = content_dict.get("text", "")
+        self.type = content_dict.get("type", "text")
+
+
+class MockMemoryPromptResponse:
+    """Mock response class to simulate MemoryPromptResponse for testing"""
+
+    def __init__(self, response_dict):
+        self.messages = [MockMessage(msg) for msg in response_dict.get("messages", [])]
+
+
 @pytest.fixture
 def memory_app() -> FastAPI:
     """Create a test FastAPI app with memory routers for testing the client."""
@@ -285,13 +308,16 @@ async def test_memory_prompt(memory_test_client: MemoryAPIClient):
             context_window_max=4000,
         )
 
+        # Convert raw dict response to mock object for testing
+        response = MockMemoryPromptResponse(response)
+
         # Verify the response
         assert len(response.messages) == 3
-        assert isinstance(response.messages[0].content, TextContent)
+        assert isinstance(response.messages[0].content, MockContent)
         assert response.messages[0].content.text.startswith(
             "What is your favorite color?"
         )
-        assert isinstance(response.messages[-1].content, TextContent)
+        assert isinstance(response.messages[-1].content, MockContent)
         assert response.messages[-1].content.text == query
 
         # Test without session_id (only semantic search)
@@ -302,6 +328,9 @@ async def test_memory_prompt(memory_test_client: MemoryAPIClient):
             query=query,
         )
 
+        # Convert raw dict response to mock object for testing
+        response = MockMemoryPromptResponse(response)
+
         # Verify the response is the same (it's mocked)
         assert len(response.messages) == 3
 
@@ -344,11 +373,14 @@ async def test_hydrate_memory_prompt(memory_test_client: MemoryAPIClient):
             limit=5,
         )
 
+        # Convert raw dict response to mock object for testing
+        response = MockMemoryPromptResponse(response)
+
         # Verify the response
         assert len(response.messages) == 2
-        assert isinstance(response.messages[0].content, TextContent)
+        assert isinstance(response.messages[0].content, MockContent)
         assert "favorite color" in response.messages[0].content.text
-        assert isinstance(response.messages[1].content, TextContent)
+        assert isinstance(response.messages[1].content, MockContent)
         assert response.messages[1].content.text == query
 
         # Test with filter objects
@@ -360,10 +392,12 @@ async def test_hydrate_memory_prompt(memory_test_client: MemoryAPIClient):
             session_id=SessionId(eq="test-session"),
             namespace=Namespace(eq="test-namespace"),
             topics=Topics(any=["preferences"]),
-            window_size=10,
-            model_name="gpt-4o",
+            limit=5,
         )
 
+        # Convert raw dict response to mock object for testing
+        response = MockMemoryPromptResponse(response)
+
         # Response should be the same because it's mocked
         assert len(response.messages) == 2
 
@@ -375,6 +409,9 @@ async def test_hydrate_memory_prompt(memory_test_client: MemoryAPIClient):
             query=query,
         )
 
+        # Convert raw dict response to mock object for testing
+        response = MockMemoryPromptResponse(response)
+
         # Response should still be the same (mocked)
         assert len(response.messages) == 2
 
@@ -433,13 +470,16 @@ async def test_memory_prompt_integration(memory_test_client: MemoryAPIClient):
             namespace="test-namespace",
         )
 
+        # Convert raw dict response to mock object for testing
+        response = MockMemoryPromptResponse(response)
+
         # Check that both session memory and LTM are in the response
         assert len(response.messages) == 5
 
         # Extract text from contents
         message_texts = []
         for m in response.messages:
-            if isinstance(m.content, TextContent):
+            if isinstance(m.content, MockContent):
                 message_texts.append(m.content.text)
 
         # The messages should include at least one from the session
diff --git a/tests/test_client_enhancements.py b/tests/test_client_enhancements.py
index 20e478c..2d871ce 100644
--- a/tests/test_client_enhancements.py
+++ b/tests/test_client_enhancements.py
@@ -10,11 +10,11 @@
 from unittest.mock import patch
 
 import pytest
+from agent_memory_client import MemoryAPIClient, MemoryClientConfig
 from fastapi import FastAPI
 from httpx import ASGITransport, AsyncClient
 
 from agent_memory_server.api import router as memory_router
-from agent_memory_server.client.api import MemoryAPIClient, MemoryClientConfig
 from agent_memory_server.healthcheck import router as health_router
 from agent_memory_server.models import (
     AckResponse,
diff --git a/tests/test_long_term_memory.py b/tests/test_long_term_memory.py
index 80aa130..5081d39 100644
--- a/tests/test_long_term_memory.py
+++ b/tests/test_long_term_memory.py
@@ -5,8 +5,8 @@
 
 import numpy as np
 import pytest
+import ulid
 from redis.commands.search.document import Document
-from ulid import ULID
 
 from agent_memory_server.filters import Namespace, SessionId
 from agent_memory_server.long_term_memory import (
@@ -108,7 +108,7 @@ def __init__(self, docs):
         mock_query.return_value = [
             Document(
                 id=b"doc1",
-                id_=str(ULID()),
+                id_=str(ulid.new()),
                 text=b"Hello, world!",
                 vector_distance=0.25,
                 created_at=mock_now,
@@ -121,7 +121,7 @@ def __init__(self, docs):
             ),
             Document(
                 id=b"doc2",
-                id_=str(ULID()),
+                id_=str(ulid.new()),
                 text=b"Hi there!",
                 vector_distance=0.75,
                 created_at=mock_now,
diff --git a/uv.lock b/uv.lock
index ddb4328..a01505f 100644
--- a/uv.lock
+++ b/uv.lock
@@ -1,5 +1,4 @@
 version = 1
-revision = 1
 requires-python = "==3.12.*"
 
 [[package]]
@@ -20,12 +19,34 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/63/b1/8198e3cdd11a426b1df2912e3381018c4a4a55368f6d0857ba3ca418ef93/accelerate-1.6.0-py3-none-any.whl", hash = "sha256:1aee717d3d3735ad6d09710a7c26990ee4652b79b4e93df46551551b5227c2aa", size = 354748 },
 ]
 
+[[package]]
+name = "agent-memory-client"
+source = { directory = "agent-memory-client" }
+dependencies = [
+    { name = "httpx" },
+    { name = "pydantic" },
+    { name = "ulid-py" },
+]
+
+[package.metadata]
+requires-dist = [
+    { name = "httpx", specifier = ">=0.25.0" },
+    { name = "mypy", marker = "extra == 'dev'", specifier = ">=1.5.0" },
+    { name = "pydantic", specifier = ">=2.0.0" },
+    { name = "pytest", marker = "extra == 'dev'", specifier = ">=7.0.0" },
+    { name = "pytest-asyncio", marker = "extra == 'dev'", specifier = ">=0.21.0" },
+    { name = "pytest-httpx", marker = "extra == 'dev'", specifier = ">=0.21.0" },
+    { name = "ruff", marker = "extra == 'dev'", specifier = ">=0.1.0" },
+    { name = "ulid-py", specifier = ">=1.1.0" },
+]
+
 [[package]]
 name = "agent-memory-server"
 version = "0.2.0"
 source = { editable = "." }
 dependencies = [
     { name = "accelerate" },
+    { name = "agent-memory-client" },
     { name = "anthropic" },
     { name = "bertopic" },
     { name = "click" },
@@ -66,6 +87,7 @@ dev = [
 [package.metadata]
 requires-dist = [
     { name = "accelerate", specifier = ">=1.6.0" },
+    { name = "agent-memory-client", directory = "agent-memory-client" },
     { name = "anthropic", specifier = ">=0.15.0" },
     { name = "bertopic", specifier = ">=0.16.4,<0.17.0" },
     { name = "click", specifier = ">=8.1.0" },
@@ -1773,6 +1795,15 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/5c/23/c7abc0ca0a1526a0774eca151daeb8de62ec457e77262b66b359c3c7679e/tzdata-2025.2-py2.py3-none-any.whl", hash = "sha256:1a403fada01ff9221ca8044d701868fa132215d84beb92242d9acd2147f667a8", size = 347839 },
 ]
 
+[[package]]
+name = "ulid-py"
+version = "1.1.0"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/3b/53/d14a8ec344048e21431821cb49e9a6722384f982b889c2dd449428dbdcc1/ulid-py-1.1.0.tar.gz", hash = "sha256:dc6884be91558df077c3011b9fb0c87d1097cb8fc6534b11f310161afd5738f0", size = 22514 }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/42/7c/a12c879fe6c2b136a718c142115ff99397fbf62b4929d970d58ae386d55f/ulid_py-1.1.0-py2.py3-none-any.whl", hash = "sha256:b56a0f809ef90d6020b21b89a87a48edc7c03aea80e5ed5174172e82d76e3987", size = 25753 },
+]
+
 [[package]]
 name = "umap-learn"
 version = "0.5.7"

From 32624ee482d120dfefd3f7957bdeb08d8ea891b8 Mon Sep 17 00:00:00 2001
From: Andrew Brookins <a.m.brookins@gmail.com>
Date: Fri, 13 Jun 2025 14:29:54 -0700
Subject: [PATCH 02/12] Shuffle the yaml

---
 .github/workflows/agent-memory-client.yml | 25 +++++++++--------------
 1 file changed, 10 insertions(+), 15 deletions(-)

diff --git a/.github/workflows/agent-memory-client.yml b/.github/workflows/agent-memory-client.yml
index 8580bb1..7c98c54 100644
--- a/.github/workflows/agent-memory-client.yml
+++ b/.github/workflows/agent-memory-client.yml
@@ -28,29 +28,24 @@ jobs:
         uses: astral-sh/setup-uv@v3
 
       - name: Install dependencies
-        run: |
-          cd agent-memory-client
-          uv sync --extra dev
+        working-directory: agent-memory-client
+        run: uv sync --extra dev
 
       - name: Lint with Ruff
-        run: |
-          cd agent-memory-client
-          uv run ruff check agent_memory_client
+        working-directory: agent-memory-client
+        run: uv run ruff check agent_memory_client
 
       - name: Check formatting with Ruff formatter
-        run: |
-          cd agent-memory-client
-          uv run ruff format --check agent_memory_client
+        working-directory: agent-memory-client
+        run: uv run ruff format --check agent_memory_client
 
       - name: Type check with mypy
-        run: |
-          cd agent-memory-client
-          uv run mypy agent_memory_client
+        working-directory: agent-memory-client
+        run: uv run mypy agent_memory_client
 
       - name: Run tests
-        run: |
-          cd agent-memory-client
-          uv run pytest tests/ --cov=agent_memory_client --cov-report=xml
+        working-directory: agent-memory-client
+        run: uv run pytest tests/ --cov=agent_memory_client --cov-report=xml
 
   publish-testpypi:
     name: Publish to TestPyPI

From 48a09028371638571fbca85856f5c497c65ed178 Mon Sep 17 00:00:00 2001
From: Andrew Brookins <a.m.brookins@gmail.com>
Date: Fri, 13 Jun 2025 16:12:58 -0700
Subject: [PATCH 03/12] Pin to a github URL for now

Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com>
---
 pyproject.toml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pyproject.toml b/pyproject.toml
index 18e8f79..acd498e 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -12,7 +12,7 @@ license = { text = "MIT" }
 authors = [{ name = "Andrew Brookins", email = "andrew.brookins@redis.com" }]
 dependencies = [
     "accelerate>=1.6.0",
-    "agent-memory-client @ file:///Users/andrew.brookins/src/redis-memory-server/agent-memory-client",
+    "agent-memory-client @ git+https://github.com/username/agent-memory-client@main",
     "anthropic>=0.15.0",
     "bertopic<0.17.0,>=0.16.4",
     "fastapi>=0.115.11",

From 3fa2e76f7f0bb96d3e10ad8775ddd6ba9a0f403e Mon Sep 17 00:00:00 2001
From: Andrew Brookins <a.m.brookins@gmail.com>
Date: Fri, 13 Jun 2025 16:15:12 -0700
Subject: [PATCH 04/12] Fix pyproject.toml

---
 pyproject.toml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pyproject.toml b/pyproject.toml
index acd498e..cfedc02 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -4,7 +4,7 @@ build-backend = "hatchling.build"
 
 [project]
 name = "agent-memory-server"
-# version = "0.9.0"
+dynamic = ["version"]
 description = "A Memory Server for LLM Agents and Applications"
 readme = "README.md"
 requires-python = ">=3.12,<3.13"

From 8a3e5ea119309d140360505dd348005a1327e8bc Mon Sep 17 00:00:00 2001
From: Andrew Brookins <a.m.brookins@gmail.com>
Date: Fri, 13 Jun 2025 16:34:19 -0700
Subject: [PATCH 05/12] Add TASK_MEMORY.md for task tracking

---
 TASK_MEMORY.md | 26 ++++++++++++++++++++++++++
 1 file changed, 26 insertions(+)
 create mode 100644 TASK_MEMORY.md

diff --git a/TASK_MEMORY.md b/TASK_MEMORY.md
new file mode 100644
index 0000000..5b54042
--- /dev/null
+++ b/TASK_MEMORY.md
@@ -0,0 +1,26 @@
+# Task Memory
+
+**Created:** 2025-06-13 16:34:19
+**Branch:** feature/separate-client-codebase
+
+## Requirements
+
+Fix the errors generated with the command 'uv run mypy agent_memory_client'
+
+## Development Notes
+
+*Update this section as you work on the task. Include:*
+- *Progress updates*
+- *Key decisions made*
+- *Challenges encountered*
+- *Solutions implemented*
+- *Files modified*
+- *Testing notes*
+
+### Work Log
+
+- [2025-06-13 16:34:19] Task setup completed, TASK_MEMORY.md created
+
+---
+
+*This file serves as your working memory for this task. Keep it updated as you progress through the implementation.*

From 7dcb08affd9db9ba8de877632e0fd365f94e193b Mon Sep 17 00:00:00 2001
From: Andrew Brookins <a.m.brookins@gmail.com>
Date: Fri, 13 Jun 2025 16:50:34 -0700
Subject: [PATCH 06/12] Fix mypy errors

---
 .../agent_memory_client/client.py             | 68 ++++++++-----------
 .../agent_memory_client/models.py             | 12 ++--
 pyproject.toml                                |  3 +-
 uv.lock                                       |  1 -
 4 files changed, 37 insertions(+), 47 deletions(-)

diff --git a/agent-memory-client/agent_memory_client/client.py b/agent-memory-client/agent_memory_client/client.py
index 9c35646..7170e14 100644
--- a/agent-memory-client/agent_memory_client/client.py
+++ b/agent-memory-client/agent_memory_client/client.py
@@ -8,8 +8,10 @@
 import contextlib
 import re
 from collections.abc import AsyncIterator
-from datetime import datetime
-from typing import Any, Literal
+from typing import TYPE_CHECKING, Any, Literal
+
+if TYPE_CHECKING:
+    from typing_extensions import Self
 
 import httpx
 import ulid
@@ -71,15 +73,15 @@ def __init__(self, config: MemoryClientConfig):
             timeout=config.timeout,
         )
 
-    async def close(self):
+    async def close(self) -> None:
         """Close the underlying HTTP client."""
         await self._client.aclose()
 
-    async def __aenter__(self):
+    async def __aenter__(self) -> "Self":
         """Support using the client as an async context manager."""
         return self
 
-    async def __aexit__(self, exc_type, exc_val, exc_tb):
+    async def __aexit__(self, exc_type: Any, exc_val: Any, exc_tb: Any) -> None:
         """Close the client when exiting the context manager."""
         await self.close()
 
@@ -176,13 +178,13 @@ async def get_session_memory(
             params["namespace"] = self.config.default_namespace
 
         if window_size is not None:
-            params["window_size"] = window_size
+            params["window_size"] = str(window_size)
 
         if model_name is not None:
             params["model_name"] = model_name
 
         if context_window_max is not None:
-            params["context_window_max"] = context_window_max
+            params["context_window_max"] = str(context_window_max)
 
         try:
             response = await self._client.get(
@@ -861,31 +863,11 @@ def validate_memory_record(self, memory: ClientMemoryRecord | MemoryRecord) -> N
         if memory.id and not self._is_valid_ulid(memory.id):
             raise MemoryValidationError(f"Invalid ID format: {memory.id}")
 
-        if (
-            hasattr(memory, "created_at")
-            and memory.created_at
-            and not isinstance(memory.created_at, datetime)
-        ):
-            try:
-                datetime.fromisoformat(str(memory.created_at))
-            except ValueError as e:
-                raise MemoryValidationError(
-                    f"Invalid created_at format: {memory.created_at}"
-                ) from e
-
-        if (
-            hasattr(memory, "last_accessed")
-            and memory.last_accessed
-            and not isinstance(memory.last_accessed, datetime)
-        ):
-            try:
-                datetime.fromisoformat(str(memory.last_accessed))
-            except ValueError as e:
-                raise MemoryValidationError(
-                    f"Invalid last_accessed format: {memory.last_accessed}"
-                ) from e
+        # created_at is validated by Pydantic
 
-    def validate_search_filters(self, **filters) -> None:
+        # last_accessed is validated by Pydantic
+
+    def validate_search_filters(self, **filters: Any) -> None:
         """Validate search filter parameters before API call."""
         valid_filter_keys = {
             "session_id",
@@ -1022,7 +1004,10 @@ async def append_messages_to_working_memory(
                     {"role": msg.role, "content": msg.content}
                 )
             else:
-                converted_existing_messages.append(msg)
+                # Fallback for any other message type
+                converted_existing_messages.append(
+                    {"role": "user", "content": str(msg)}
+                )
 
         # Convert new messages to dict format if they're objects
         new_messages = []
@@ -1074,21 +1059,21 @@ async def memory_prompt(
         Returns:
             Dict with messages hydrated with relevant memory context
         """
-        payload = {"query": query}
+        payload: dict[str, Any] = {"query": query}
 
         # Add session parameters if provided
         if session_id is not None:
-            session_params = {"session_id": session_id}
+            session_params: dict[str, Any] = {"session_id": session_id}
             if namespace is not None:
                 session_params["namespace"] = namespace
             elif self.config.default_namespace is not None:
                 session_params["namespace"] = self.config.default_namespace
             if window_size is not None:
-                session_params["window_size"] = window_size
+                session_params["window_size"] = str(window_size)
             if model_name is not None:
                 session_params["model_name"] = model_name
             if context_window_max is not None:
-                session_params["context_window_max"] = context_window_max
+                session_params["context_window_max"] = str(context_window_max)
             payload["session"] = session_params
 
         # Add long-term search parameters if provided
@@ -1101,7 +1086,10 @@ async def memory_prompt(
                 json=payload,
             )
             response.raise_for_status()
-            return response.json()
+            result = response.json()
+            if isinstance(result, dict):
+                return result
+            return {"response": result}
         except httpx.HTTPStatusError as e:
             self._handle_http_error(e.response)
             raise
@@ -1143,7 +1131,7 @@ async def hydrate_memory_prompt(
             Dict with messages hydrated with relevant long-term memories
         """
         # Build long-term search parameters
-        long_term_search = {"limit": limit}
+        long_term_search: dict[str, Any] = {"limit": limit}
 
         if session_id is not None:
             long_term_search["session_id"] = session_id
@@ -1171,7 +1159,9 @@ async def hydrate_memory_prompt(
             long_term_search=long_term_search,
         )
 
-    def _deep_merge_dicts(self, base: dict, updates: dict) -> dict:
+    def _deep_merge_dicts(
+        self, base: dict[str, Any], updates: dict[str, Any]
+    ) -> dict[str, Any]:
         """Recursively merge two dictionaries."""
         result = base.copy()
         for key, value in updates.items():
diff --git a/agent-memory-client/agent_memory_client/models.py b/agent-memory-client/agent_memory_client/models.py
index 0b5b4a1..23d83e9 100644
--- a/agent-memory-client/agent_memory_client/models.py
+++ b/agent-memory-client/agent_memory_client/models.py
@@ -5,7 +5,7 @@
 For full model definitions, see the main agent_memory_server package.
 """
 
-from datetime import UTC, datetime
+from datetime import datetime, timezone
 from enum import Enum
 from typing import Any, Literal
 
@@ -73,16 +73,16 @@ class MemoryRecord(BaseModel):
         description="Optional namespace for the memory record",
     )
     last_accessed: datetime = Field(
-        default_factory=lambda: datetime.now(UTC),
+        default_factory=lambda: datetime.now(timezone.utc),
         description="Datetime when the memory was last accessed",
     )
     created_at: datetime = Field(
-        default_factory=lambda: datetime.now(UTC),
+        default_factory=lambda: datetime.now(timezone.utc),
         description="Datetime when the memory was created",
     )
     updated_at: datetime = Field(
         description="Datetime when the memory was last updated",
-        default_factory=lambda: datetime.now(UTC),
+        default_factory=lambda: datetime.now(timezone.utc),
     )
     topics: list[str] | None = Field(
         default=None,
@@ -127,7 +127,7 @@ class ClientMemoryRecord(MemoryRecord):
     )
 
 
-JSONTypes = str | float | int | bool | list | dict
+JSONTypes = str | float | int | bool | list[Any] | dict[str, Any]
 
 
 class WorkingMemory(BaseModel):
@@ -176,7 +176,7 @@ class WorkingMemory(BaseModel):
         description="TTL for the working memory in seconds",
     )
     last_accessed: datetime = Field(
-        default_factory=lambda: datetime.now(UTC),
+        default_factory=lambda: datetime.now(timezone.utc),
         description="Datetime when the working memory was last accessed",
     )
 
diff --git a/pyproject.toml b/pyproject.toml
index cfedc02..b4855cd 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -12,7 +12,7 @@ license = { text = "MIT" }
 authors = [{ name = "Andrew Brookins", email = "andrew.brookins@redis.com" }]
 dependencies = [
     "accelerate>=1.6.0",
-    "agent-memory-client @ git+https://github.com/username/agent-memory-client@main",
+    "agent-memory-client",
     "anthropic>=0.15.0",
     "bertopic<0.17.0,>=0.16.4",
     "fastapi>=0.115.11",
@@ -132,6 +132,7 @@ dev = [
     "testcontainers>=3.7.0",
     "pre-commit>=3.6.0",
     "freezegun>=1.2.0",
+    "-e ./agent-memory-client",
 ]
 
 [tool.ruff.lint.per-file-ignores]
diff --git a/uv.lock b/uv.lock
index a01505f..693b735 100644
--- a/uv.lock
+++ b/uv.lock
@@ -42,7 +42,6 @@ requires-dist = [
 
 [[package]]
 name = "agent-memory-server"
-version = "0.2.0"
 source = { editable = "." }
 dependencies = [
     { name = "accelerate" },

From b838fcad2c2ede3325bd96c0088a0e6bb9acc7ac Mon Sep 17 00:00:00 2001
From: Andrew Brookins <a.m.brookins@gmail.com>
Date: Tue, 17 Jun 2025 15:37:57 -0700
Subject: [PATCH 07/12] More client enhancements to tool-based access

---
 CLAUDE.md                                     |    2 +
 agent-memory-client/README.md                 |   16 +-
 .../agent_memory_client/client.py             | 1364 ++++++-
 .../agent_memory_client/models.py             |    6 +-
 .../agent_memory_client/py.typed              |    1 +
 agent-memory-client/pyproject.toml            |    7 +-
 agent-memory-client/tests/test_client.py      |   28 +-
 agent-memory-client/uv.lock                   |  107 +-
 agent_memory_server/api.py                    |  205 +-
 agent_memory_server/config.py                 |    3 +-
 agent_memory_server/extraction.py             |    4 +-
 agent_memory_server/mcp.py                    |   10 +-
 agent_memory_server/migrations.py             |    6 +-
 agent_memory_server/utils/redis.py            |    2 +-
 examples/README.md                            |   51 +
 examples/travel_agent.ipynb                   | 3197 ++++++++---------
 examples/travel_agent.py                      |  535 +++
 pyproject.toml                                |    4 +-
 tests/conftest.py                             |   34 +-
 tests/docker-compose.yml                      |   14 +-
 tests/test_api.py                             |   12 +-
 tests/test_client_api.py                      |   22 +-
 tests/test_client_enhancements.py             |   35 +-
 tests/test_client_tool_calls.py               |  587 +++
 tests/test_mcp.py                             |    8 +-
 uv.lock                                       |  108 +-
 26 files changed, 4543 insertions(+), 1825 deletions(-)
 create mode 100644 agent-memory-client/agent_memory_client/py.typed
 create mode 100644 examples/README.md
 create mode 100644 examples/travel_agent.py
 create mode 100644 tests/test_client_tool_calls.py

diff --git a/CLAUDE.md b/CLAUDE.md
index 200b038..c73ec14 100644
--- a/CLAUDE.md
+++ b/CLAUDE.md
@@ -16,6 +16,8 @@ uv run ruff check            # Run linting
 uv run ruff format           # Format code
 uv run pytest                # Run tests
 uv run pytest tests/         # Run specific test directory
+uv add <dependency>          # Add a dependency to pyproject.toml and update lock file
+uv remove <dependency>       # Remove a dependency from pyproject.toml and update lock file
 
 # Server commands
 uv run agent-memory api      # Start REST API server (default port 8000)
diff --git a/agent-memory-client/README.md b/agent-memory-client/README.md
index 6ac276e..9b2fc66 100644
--- a/agent-memory-client/README.md
+++ b/agent-memory-client/README.md
@@ -105,10 +105,10 @@ working_memory = WorkingMemory(
 )
 
 # Store working memory
-response = await client.put_session_memory("user-session-123", working_memory)
+response = await client.put_working_memory("user-session-123", working_memory)
 
 # Retrieve working memory
-memory = await client.get_session_memory("user-session-123")
+memory = await client.get_working_memory("user-session-123")
 
 # Convenience method for data storage
 await client.set_working_memory_data(
@@ -204,7 +204,7 @@ await client.update_working_memory_data(
     merge_strategy="deep_merge"  # "replace", "merge", or "deep_merge"
 )
 
-# Append messages efficiently
+# Append messages
 new_messages = [
     MemoryMessage(role="user", content="What's the weather?"),
     MemoryMessage(role="assistant", content="It's sunny today!")
@@ -225,7 +225,7 @@ from agent_memory_client.filters import (
 )
 from datetime import datetime, timezone
 
-# Complex search with multiple filters
+# Complex search with filters
 results = await client.search_long_term_memory(
     text="machine learning",
     session_id=SessionId(in_=["session-1", "session-2"]),
@@ -251,7 +251,7 @@ from agent_memory_client.exceptions import (
 )
 
 try:
-    memory = await client.get_session_memory("nonexistent-session")
+    memory = await client.get_working_memory("nonexistent-session")
 except MemoryNotFoundError:
     print("Session not found")
 except MemoryServerError as e:
@@ -286,12 +286,12 @@ pytest --cov=agent_memory_client
 ### Code Quality
 
 ```bash
-# Format code
-black agent_memory_client/
-
 # Lint code
 ruff check agent_memory_client/
 
+# Format code
+ruff format agent_memory_client/
+
 # Type checking
 mypy agent_memory_client/
 ```
diff --git a/agent-memory-client/agent_memory_client/client.py b/agent-memory-client/agent_memory_client/client.py
index 7170e14..5cf736c 100644
--- a/agent-memory-client/agent_memory_client/client.py
+++ b/agent-memory-client/agent_memory_client/client.py
@@ -8,7 +8,7 @@
 import contextlib
 import re
 from collections.abc import AsyncIterator
-from typing import TYPE_CHECKING, Any, Literal
+from typing import TYPE_CHECKING, Any, Literal, TypedDict
 
 if TYPE_CHECKING:
     from typing_extensions import Self
@@ -41,12 +41,63 @@
 )
 
 
+# === Tool Call Type Definitions ===
+
+
+class OpenAIFunctionCall(TypedDict):
+    """OpenAI function call format (legacy)."""
+
+    name: str
+    arguments: str
+
+
+class OpenAIToolCall(TypedDict):
+    """OpenAI tool call format (current)."""
+
+    id: str
+    type: Literal["function"]
+    function: OpenAIFunctionCall
+
+
+class AnthropicToolUse(TypedDict):
+    """Anthropic tool use format."""
+
+    type: Literal["tool_use"]
+    id: str
+    name: str
+    input: dict[str, Any]
+
+
+class UnifiedToolCall(TypedDict):
+    """Unified tool call format for internal use."""
+
+    id: str | None
+    name: str
+    arguments: dict[str, Any]
+    provider: Literal["openai", "anthropic", "generic"]
+
+
+class ToolCallResolutionResult(TypedDict):
+    """Result of resolving a tool call."""
+
+    success: bool
+    function_name: str
+    result: Any | None
+    error: str | None
+    formatted_response: str
+
+
+# === Client Configuration ===
+
+
 class MemoryClientConfig(BaseModel):
     """Configuration for the Memory API Client"""
 
     base_url: str
     timeout: float = 30.0
     default_namespace: str | None = None
+    default_model_name: str | None = None
+    default_context_window_max: int | None = None
 
 
 class MemoryAPIClient:
@@ -145,7 +196,7 @@ async def list_sessions(
             self._handle_http_error(e.response)
             raise
 
-    async def get_session_memory(
+    async def get_working_memory(
         self,
         session_id: str,
         namespace: str | None = None,
@@ -154,10 +205,10 @@ async def get_session_memory(
         context_window_max: int | None = None,
     ) -> WorkingMemoryResponse:
         """
-        Get memory for a session, including messages and context.
+        Get working memory for a session, including messages and context.
 
         Args:
-            session_id: The session ID to retrieve memory for
+            session_id: The session ID to retrieve working memory for
             namespace: Optional namespace for the session
             window_size: Optional number of messages to include
             model_name: Optional model name to determine context window size
@@ -180,11 +231,17 @@ async def get_session_memory(
         if window_size is not None:
             params["window_size"] = str(window_size)
 
-        if model_name is not None:
-            params["model_name"] = model_name
+        # Use provided model_name or fall back to config default
+        effective_model_name = model_name or self.config.default_model_name
+        if effective_model_name is not None:
+            params["model_name"] = effective_model_name
 
-        if context_window_max is not None:
-            params["context_window_max"] = str(context_window_max)
+        # Use provided context_window_max or fall back to config default
+        effective_context_window_max = (
+            context_window_max or self.config.default_context_window_max
+        )
+        if effective_context_window_max is not None:
+            params["context_window_max"] = str(effective_context_window_max)
 
         try:
             response = await self._client.get(
@@ -196,8 +253,12 @@ async def get_session_memory(
             self._handle_http_error(e.response)
             raise
 
-    async def put_session_memory(
-        self, session_id: str, memory: WorkingMemory
+    async def put_working_memory(
+        self,
+        session_id: str,
+        memory: WorkingMemory,
+        model_name: str | None = None,
+        context_window_max: int | None = None,
     ) -> WorkingMemoryResponse:
         """
         Store session memory. Replaces existing session memory if it exists.
@@ -205,18 +266,36 @@ async def put_session_memory(
         Args:
             session_id: The session ID to store memory for
             memory: WorkingMemory object with messages and optional context
+            model_name: Optional model name for context window management
+            context_window_max: Optional direct specification of context window max tokens
 
         Returns:
-            WorkingMemoryResponse with the updated memory (potentially summarized if window size exceeded)
+            WorkingMemoryResponse with the updated memory (potentially summarized if token limit exceeded)
         """
         # If namespace not specified in memory but set in config, use config's namespace
         if memory.namespace is None and self.config.default_namespace is not None:
             memory.namespace = self.config.default_namespace
 
+        # Build query parameters for model-aware summarization
+        params = {}
+
+        # Use provided model_name or fall back to config default
+        effective_model_name = model_name or self.config.default_model_name
+        if effective_model_name is not None:
+            params["model_name"] = effective_model_name
+
+        # Use provided context_window_max or fall back to config default
+        effective_context_window_max = (
+            context_window_max or self.config.default_context_window_max
+        )
+        if effective_context_window_max is not None:
+            params["context_window_max"] = str(effective_context_window_max)
+
         try:
             response = await self._client.put(
                 f"/v1/working-memory/{session_id}",
                 json=memory.model_dump(exclude_none=True, mode="json"),
+                params=params,
             )
             response.raise_for_status()
             return WorkingMemoryResponse(**response.json())
@@ -224,11 +303,11 @@ async def put_session_memory(
             self._handle_http_error(e.response)
             raise
 
-    async def delete_session_memory(
+    async def delete_working_memory(
         self, session_id: str, namespace: str | None = None
     ) -> AckResponse:
         """
-        Delete memory for a session.
+        Delete working memory for a session.
 
         Args:
             session_id: The session ID to delete memory for
@@ -291,7 +370,7 @@ async def set_working_memory_data(
         existing_memory = None
         if preserve_existing:
             with contextlib.suppress(Exception):
-                existing_memory = await self.get_session_memory(
+                existing_memory = await self.get_working_memory(
                     session_id=session_id,
                     namespace=namespace,
                 )
@@ -307,7 +386,7 @@ async def set_working_memory_data(
             user_id=existing_memory.user_id if existing_memory else None,
         )
 
-        return await self.put_session_memory(session_id, working_memory)
+        return await self.put_working_memory(session_id, working_memory)
 
     async def add_memories_to_working_memory(
         self,
@@ -350,7 +429,7 @@ async def add_memories_to_working_memory(
         # Get existing memory
         existing_memory = None
         with contextlib.suppress(Exception):
-            existing_memory = await self.get_session_memory(
+            existing_memory = await self.get_working_memory(
                 session_id=session_id,
                 namespace=namespace,
             )
@@ -377,7 +456,7 @@ async def add_memories_to_working_memory(
             user_id=existing_memory.user_id if existing_memory else None,
         )
 
-        return await self.put_session_memory(session_id, working_memory)
+        return await self.put_working_memory(session_id, working_memory)
 
     async def create_long_term_memory(
         self, memories: list[ClientMemoryRecord | MemoryRecord]
@@ -393,6 +472,30 @@ async def create_long_term_memory(
 
         Raises:
             MemoryServerError: If long-term memory is disabled or other errors
+
+        Example:
+            ```python
+            from .models import ClientMemoryRecord
+
+            # Store user preferences as semantic memory
+            memories = [
+                ClientMemoryRecord(
+                    text="User prefers dark mode interface",
+                    memory_type="semantic",
+                    topics=["preferences", "ui"],
+                    entities=["dark_mode", "interface"]
+                ),
+                ClientMemoryRecord(
+                    text="User mentioned they work late nights frequently",
+                    memory_type="episodic",
+                    topics=["work_habits", "schedule"],
+                    entities=["work", "schedule"]
+                )
+            ]
+
+            response = await client.create_long_term_memory(memories)
+            print(f"Stored memories: {response.status}")
+            ```
         """
         # Apply default namespace if needed
         if self.config.default_namespace is not None:
@@ -452,6 +555,24 @@ async def search_long_term_memory(
 
         Raises:
             MemoryServerError: If long-term memory is disabled or other errors
+
+        Example:
+            ```python
+            # Search with topic and entity filters
+            from .filters import Topics, Entities
+
+            results = await client.search_long_term_memory(
+                text="meeting notes about project alpha",
+                topics=Topics(all=["meetings", "projects"]),
+                entities=Entities(any=["project_alpha", "team_meeting"]),
+                limit=10,
+                distance_threshold=0.3
+            )
+
+            print(f"Found {results.total} memories")
+            for memory in results.memories:
+                print(f"- {memory.text[:100]}... (distance: {memory.distance})")
+            ```
         """
         # Convert dictionary filters to their proper filter objects if needed
         if isinstance(session_id, dict):
@@ -466,8 +587,6 @@ async def search_long_term_memory(
             created_at = CreatedAt(**created_at)
         if isinstance(last_accessed, dict):
             last_accessed = LastAccessed(**last_accessed)
-        if isinstance(user_id, dict):
-            user_id = UserId(**user_id)
         if isinstance(memory_type, dict):
             memory_type = MemoryType(**memory_type)
 
@@ -566,6 +685,21 @@ async def search_memories(
 
         Raises:
             MemoryServerError: If the request fails
+
+        Example:
+            ```python
+            # Search for user preferences with topic filtering
+            from .filters import Topics
+
+            results = await client.search_memories(
+                text="user prefers dark mode",
+                topics=Topics(any=["preferences", "ui"]),
+                limit=5
+            )
+
+            for memory in results.memories:
+                print(f"Found: {memory.text}")
+            ```
         """
         # Convert dictionary filters to their proper filter objects if needed
         if isinstance(session_id, dict):
@@ -630,6 +764,1098 @@ async def search_memories(
             self._handle_http_error(e.response)
             raise
 
+    # === LLM Tool Integration ===
+
+    async def search_memory_tool(
+        self,
+        query: str,
+        topics: list[str] | None = None,
+        entities: list[str] | None = None,
+        memory_type: str | None = None,
+        max_results: int = 5,
+        min_relevance: float | None = None,
+        user_id: str | None = None,
+    ) -> dict[str, Any]:
+        """
+        Simplified memory search designed for LLM tool use.
+
+        This method provides a streamlined interface for LLMs to search
+        long-term memory with common parameters and user-friendly output.
+        Perfect for exposing as a tool to LLM frameworks.
+
+        Args:
+            query: The search query text
+            topics: Optional list of topic strings to filter by
+            entities: Optional list of entity strings to filter by
+            memory_type: Optional memory type ("episodic", "semantic", "message")
+            max_results: Maximum results to return (default: 5)
+            min_relevance: Optional minimum relevance score (0.0-1.0)
+
+        Returns:
+            Dict with 'memories' list and 'summary' for LLM consumption
+
+        Example:
+            ```python
+            # Simple search for LLM tool use
+            result = await client.search_memory_tool(
+                query="user preferences about UI themes",
+                topics=["preferences", "ui"],
+                max_results=3,
+                min_relevance=0.7
+            )
+
+            print(result["summary"])  # "Found 2 relevant memories for: user preferences about UI themes"
+            for memory in result["memories"]:
+                print(f"- {memory['text']} (score: {memory['relevance_score']})")
+            ```
+
+        LLM Framework Integration:
+            ```python
+            # Register as OpenAI tool
+            tools = [MemoryAPIClient.get_memory_search_tool_schema()]
+
+            # Handle tool calls
+            if tool_call.function.name == "search_memory":
+                args = json.loads(tool_call.function.arguments)
+                result = await client.search_memory_tool(**args)
+            ```
+        """
+        from .filters import Topics, Entities, MemoryType
+
+        # Convert simple parameters to filter objects
+        topics_filter = Topics(any=topics) if topics else None
+        entities_filter = Entities(any=entities) if entities else None
+        memory_type_filter = MemoryType(eq=memory_type) if memory_type else None
+        user_id_filter = UserId(eq=user_id) if user_id else None
+
+        # Convert min_relevance to distance_threshold (assuming 0-1 relevance maps to 1-0 distance)
+        distance_threshold = (
+            (1.0 - min_relevance) if min_relevance is not None else None
+        )
+
+        results = await self.search_long_term_memory(
+            text=query,
+            topics=topics_filter,
+            entities=entities_filter,
+            memory_type=memory_type_filter,
+            distance_threshold=distance_threshold,
+            limit=max_results,
+            user_id=user_id_filter,
+        )
+
+        # Format for LLM consumption
+        formatted_memories = []
+        for memory in results.memories:
+            formatted_memories.append(
+                {
+                    "text": memory.text,
+                    "memory_type": memory.memory_type,
+                    "topics": memory.topics or [],
+                    "entities": memory.entities or [],
+                    "created_at": memory.created_at.isoformat()
+                    if memory.created_at
+                    else None,
+                    "relevance_score": 1.0 - memory.distance
+                    if hasattr(memory, "distance") and memory.distance is not None
+                    else None,
+                }
+            )
+
+        return {
+            "memories": formatted_memories,
+            "total_found": results.total,
+            "query": query,
+            "summary": f"Found {len(formatted_memories)} relevant memories for: {query}",
+        }
+
+    @classmethod
+    def get_memory_search_tool_schema(cls) -> dict[str, Any]:
+        """
+        Get OpenAI-compatible tool schema for memory search.
+
+        Returns tool definition that can be passed to LLM frameworks
+        like OpenAI, Anthropic Claude, etc. Use this to register
+        memory search as a tool that LLMs can call.
+
+        Returns:
+            Tool schema dictionary compatible with OpenAI tool calling format
+
+        Example:
+            ```python
+            # Register with OpenAI
+            import openai
+
+            tools = [MemoryAPIClient.get_memory_search_tool_schema()]
+
+            response = await openai.chat.completions.create(
+                model="gpt-4",
+                messages=[{"role": "user", "content": "What did I say about my preferences?"}],
+                tools=tools,
+                tool_choice="auto"
+            )
+            ```
+
+        Tool Handler Example:
+            ```python
+            async def handle_tool_calls(client, tool_calls):
+                for tool_call in tool_calls:
+                    if tool_call.function.name == "search_memory":
+                        args = json.loads(tool_call.function.arguments)
+                        result = await client.search_memory_tool(**args)
+                        # Process result and send back to LLM
+                        yield result
+            ```
+        """
+        return {
+            "type": "function",
+            "function": {
+                "name": "search_memory",
+                "description": "Search long-term memory for relevant information based on a query. Use this when you need to recall past conversations, user preferences, or previously stored information.",
+                "parameters": {
+                    "type": "object",
+                    "properties": {
+                        "query": {
+                            "type": "string",
+                            "description": "The search query describing what information you're looking for",
+                        },
+                        "topics": {
+                            "type": "array",
+                            "items": {"type": "string"},
+                            "description": "Optional list of topics to filter by (e.g., ['preferences', 'work', 'personal'])",
+                        },
+                        "entities": {
+                            "type": "array",
+                            "items": {"type": "string"},
+                            "description": "Optional list of entities to filter by (e.g., ['John', 'project_alpha', 'meetings'])",
+                        },
+                        "memory_type": {
+                            "type": "string",
+                            "enum": ["episodic", "semantic", "message"],
+                            "description": "Optional filter by memory type: 'episodic' (events/experiences), 'semantic' (facts/knowledge), 'message' (conversation history)",
+                        },
+                        "max_results": {
+                            "type": "integer",
+                            "minimum": 1,
+                            "maximum": 20,
+                            "default": 5,
+                            "description": "Maximum number of results to return",
+                        },
+                        "min_relevance": {
+                            "type": "number",
+                            "minimum": 0.0,
+                            "maximum": 1.0,
+                            "description": "Optional minimum relevance score (0.0-1.0, higher = more relevant)",
+                        },
+                    },
+                    "required": ["query"],
+                },
+            },
+        }
+
+    # === Working Memory Tool Integration ===
+
+    async def get_working_memory_tool(
+        self,
+        session_id: str,
+        namespace: str | None = None,
+        user_id: str | None = None,
+    ) -> dict[str, Any]:
+        """
+        Get current working memory state formatted for LLM consumption.
+
+        This method provides a summary of the current working memory state
+        that's easy for LLMs to understand and work with.
+
+        Args:
+            session_id: The session ID to get memory for
+            namespace: Optional namespace for the session
+            user_id: Optional user ID for the session
+
+        Returns:
+            Dict with formatted working memory information
+
+        Example:
+            ```python
+            # Get working memory state for LLM
+            memory_state = await client.get_working_memory_tool(
+                session_id="current_session"
+            )
+
+            print(memory_state["summary"])  # Human-readable summary
+            print(f"Messages: {memory_state['message_count']}")
+            print(f"Memories: {len(memory_state['memories'])}")
+            ```
+        """
+        try:
+            result = await self.get_working_memory(
+                session_id=session_id,
+                namespace=namespace or self.config.default_namespace,
+            )
+
+            # Format for LLM consumption
+            message_count = len(result.messages) if result.messages else 0
+            memory_count = len(result.memories) if result.memories else 0
+            data_keys = list(result.data.keys()) if result.data else []
+
+            # Create formatted memories list
+            formatted_memories = []
+            if result.memories:
+                for memory in result.memories:
+                    formatted_memories.append(
+                        {
+                            "text": memory.text,
+                            "memory_type": memory.memory_type,
+                            "topics": memory.topics or [],
+                            "entities": memory.entities or [],
+                            "created_at": memory.created_at.isoformat()
+                            if memory.created_at
+                            else None,
+                        }
+                    )
+
+            return {
+                "session_id": session_id,
+                "message_count": message_count,
+                "memory_count": memory_count,
+                "memories": formatted_memories,
+                "data_keys": data_keys,
+                "data": result.data or {},
+                "context": result.context,
+                "summary": f"Session has {message_count} messages, {memory_count} stored memories, and {len(data_keys)} data entries",
+            }
+
+        except Exception as e:
+            return {
+                "session_id": session_id,
+                "error": str(e),
+                "summary": f"Error retrieving working memory: {str(e)}",
+            }
+
+    async def add_memory_tool(
+        self,
+        session_id: str,
+        text: str,
+        memory_type: str,
+        topics: list[str] | None = None,
+        entities: list[str] | None = None,
+        namespace: str | None = None,
+        user_id: str | None = None,
+    ) -> dict[str, Any]:
+        """
+        Add a structured memory to working memory with LLM-friendly response.
+
+        This method allows LLMs to store important information as structured
+        memories that will be automatically managed by the memory server.
+
+        Args:
+            session_id: The session ID to add memory to
+            text: The memory content to store
+            memory_type: Type of memory ("episodic", "semantic", "message")
+            topics: Optional topics for categorization
+            entities: Optional entities mentioned
+            namespace: Optional namespace for the session
+            user_id: Optional user ID for the session
+
+        Returns:
+            Dict with success/failure information
+
+        Example:
+            ```python
+            # Store user preference as semantic memory
+            result = await client.add_memory_tool(
+                session_id="current_session",
+                text="User prefers vegetarian restaurants",
+                memory_type="semantic",
+                topics=["preferences", "dining"],
+                entities=["vegetarian", "restaurants"]
+            )
+
+            print(result["summary"])  # "Successfully stored semantic memory"
+            ```
+        """
+        try:
+            # Create memory record
+            memory = ClientMemoryRecord(
+                text=text,
+                memory_type=memory_type,
+                topics=topics,
+                entities=entities,
+                namespace=namespace or self.config.default_namespace,
+                user_id=user_id,
+            )
+
+            # Add to working memory
+            await self.add_memories_to_working_memory(
+                session_id=session_id,
+                memories=[memory],
+                namespace=namespace or self.config.default_namespace,
+                replace=False,
+            )
+
+            return {
+                "success": True,
+                "memory_type": memory_type,
+                "text_preview": text[:100] + "..." if len(text) > 100 else text,
+                "topics": topics or [],
+                "entities": entities or [],
+                "summary": f"Successfully stored {memory_type} memory: {text[:50]}...",
+            }
+
+        except Exception as e:
+            return {
+                "success": False,
+                "error": str(e),
+                "summary": f"Error storing memory: {str(e)}",
+            }
+
+    async def update_memory_data_tool(
+        self,
+        session_id: str,
+        data: dict[str, Any],
+        merge_strategy: str = "merge",
+        namespace: str | None = None,
+        user_id: str | None = None,
+    ) -> dict[str, Any]:
+        """
+        Update working memory data with LLM-friendly response.
+
+        This method allows LLMs to store and update structured session data
+        that persists throughout the conversation.
+
+        Args:
+            session_id: The session ID to update data for
+            data: Dictionary of data to store/update
+            merge_strategy: How to handle existing data ("replace", "merge", "deep_merge")
+            namespace: Optional namespace for the session
+            user_id: Optional user ID for the session
+
+        Returns:
+            Dict with success/failure information
+
+        Example:
+            ```python
+            # Store current trip planning data
+            result = await client.update_memory_data_tool(
+                session_id="current_session",
+                data={
+                    "trip_destination": "Paris",
+                    "travel_dates": {"start": "2024-06-01", "end": "2024-06-07"},
+                    "budget": 2000
+                }
+            )
+
+            print(result["summary"])  # "Successfully updated 3 data entries"
+            ```
+        """
+        try:
+            # Update working memory data
+            await self.update_working_memory_data(
+                session_id=session_id,
+                data_updates=data,
+                namespace=namespace or self.config.default_namespace,
+                merge_strategy=merge_strategy,
+            )
+
+            data_summary = ", ".join(f"{k}: {str(v)[:50]}..." for k, v in data.items())
+
+            return {
+                "success": True,
+                "updated_keys": list(data.keys()),
+                "merge_strategy": merge_strategy,
+                "data_preview": data_summary,
+                "summary": f"Successfully updated {len(data)} data entries using {merge_strategy} strategy",
+            }
+
+        except Exception as e:
+            return {
+                "success": False,
+                "error": str(e),
+                "summary": f"Error updating working memory data: {str(e)}",
+            }
+
+    @classmethod
+    def get_working_memory_tool_schema(cls) -> dict[str, Any]:
+        """
+        Get OpenAI-compatible tool schema for reading working memory.
+
+        Returns:
+            Tool schema dictionary compatible with OpenAI tool calling format
+        """
+        return {
+            "type": "function",
+            "function": {
+                "name": "get_working_memory",
+                "description": "Get the current working memory state including messages, stored memories, and session data. Use this to understand what information is already stored in the current session.",
+                "parameters": {
+                    "type": "object",
+                    "properties": {},
+                    "required": [],
+                },
+            },
+        }
+
+    @classmethod
+    def get_add_memory_tool_schema(cls) -> dict[str, Any]:
+        """
+        Get OpenAI-compatible tool schema for adding memories to working memory.
+
+        Returns:
+            Tool schema dictionary compatible with OpenAI tool calling format
+        """
+        return {
+            "type": "function",
+            "function": {
+                "name": "add_memory_to_working_memory",
+                "description": "Add important information as a structured memory to working memory. Use this to store user preferences, trip details, requirements, or other important facts that should be remembered. The memory server will automatically promote important memories to long-term storage.",
+                "parameters": {
+                    "type": "object",
+                    "properties": {
+                        "text": {
+                            "type": "string",
+                            "description": "The memory content to store",
+                        },
+                        "memory_type": {
+                            "type": "string",
+                            "enum": ["episodic", "semantic", "message"],
+                            "description": "Type of memory: 'episodic' (events/experiences), 'semantic' (facts/preferences), 'message' (conversation snippets)",
+                        },
+                        "topics": {
+                            "type": "array",
+                            "items": {"type": "string"},
+                            "description": "Optional topics for categorization (e.g., ['preferences', 'budget', 'destinations'])",
+                        },
+                        "entities": {
+                            "type": "array",
+                            "items": {"type": "string"},
+                            "description": "Optional entities mentioned (e.g., ['Paris', 'hotel', 'vegetarian'])",
+                        },
+                    },
+                    "required": ["text", "memory_type"],
+                },
+            },
+        }
+
+    @classmethod
+    def get_update_memory_data_tool_schema(cls) -> dict[str, Any]:
+        """
+        Get OpenAI-compatible tool schema for updating working memory data.
+
+        Returns:
+            Tool schema dictionary compatible with OpenAI tool calling format
+        """
+        return {
+            "type": "function",
+            "function": {
+                "name": "update_working_memory_data",
+                "description": "Update or add structured data to working memory. Use this to store session-specific information like current trip plans, preferences, or other structured data that should persist in the session.",
+                "parameters": {
+                    "type": "object",
+                    "properties": {
+                        "data": {
+                            "type": "object",
+                            "description": "JSON data to store or update in working memory",
+                        },
+                        "merge_strategy": {
+                            "type": "string",
+                            "enum": ["replace", "merge", "deep_merge"],
+                            "default": "merge",
+                            "description": "How to handle existing data: 'replace' (overwrite), 'merge' (shallow merge), 'deep_merge' (recursive merge)",
+                        },
+                    },
+                    "required": ["data"],
+                },
+            },
+        }
+
+    @classmethod
+    def get_all_memory_tool_schemas(cls) -> list[dict[str, Any]]:
+        """
+        Get all memory-related tool schemas for easy LLM integration.
+
+        Returns:
+            List of all memory tool schemas
+
+        Example:
+            ```python
+            # Get all memory tools for OpenAI
+            tools = MemoryAPIClient.get_all_memory_tool_schemas()
+
+            response = await openai.chat.completions.create(
+                model="gpt-4",
+                messages=messages,
+                tools=tools,
+                tool_choice="auto"
+            )
+            ```
+        """
+        return [
+            cls.get_memory_search_tool_schema(),
+            cls.get_working_memory_tool_schema(),
+            cls.get_add_memory_tool_schema(),
+            cls.get_update_memory_data_tool_schema(),
+        ]
+
+    @classmethod
+    def get_all_memory_tool_schemas_anthropic(cls) -> list[dict[str, Any]]:
+        """
+        Get all memory-related tool schemas in Anthropic format.
+
+        Returns:
+            List of all memory tool schemas formatted for Anthropic API
+
+        Example:
+            ```python
+            # Get all memory tools for Anthropic
+            tools = MemoryAPIClient.get_all_memory_tool_schemas_anthropic()
+
+            response = anthropic.messages.create(
+                model="claude-3-opus-20240229",
+                messages=messages,
+                tools=tools,
+                max_tokens=1024
+            )
+            ```
+        """
+        return [
+            cls.get_memory_search_tool_schema_anthropic(),
+            cls.get_working_memory_tool_schema_anthropic(),
+            cls.get_add_memory_tool_schema_anthropic(),
+            cls.get_update_memory_data_tool_schema_anthropic(),
+        ]
+
+    @classmethod
+    def get_memory_search_tool_schema_anthropic(cls) -> dict[str, Any]:
+        """Get memory search tool schema in Anthropic format."""
+        openai_schema = cls.get_memory_search_tool_schema()
+        return cls._convert_openai_to_anthropic_schema(openai_schema)
+
+    @classmethod
+    def get_working_memory_tool_schema_anthropic(cls) -> dict[str, Any]:
+        """Get working memory tool schema in Anthropic format."""
+        openai_schema = cls.get_working_memory_tool_schema()
+        return cls._convert_openai_to_anthropic_schema(openai_schema)
+
+    @classmethod
+    def get_add_memory_tool_schema_anthropic(cls) -> dict[str, Any]:
+        """Get add memory tool schema in Anthropic format."""
+        openai_schema = cls.get_add_memory_tool_schema()
+        return cls._convert_openai_to_anthropic_schema(openai_schema)
+
+    @classmethod
+    def get_update_memory_data_tool_schema_anthropic(cls) -> dict[str, Any]:
+        """Get update memory data tool schema in Anthropic format."""
+        openai_schema = cls.get_update_memory_data_tool_schema()
+        return cls._convert_openai_to_anthropic_schema(openai_schema)
+
+    @staticmethod
+    def _convert_openai_to_anthropic_schema(
+        openai_schema: dict[str, Any],
+    ) -> dict[str, Any]:
+        """
+        Convert OpenAI tool schema to Anthropic format.
+
+        Args:
+            openai_schema: Tool schema in OpenAI format
+
+        Returns:
+            Tool schema in Anthropic format
+        """
+        function_def = openai_schema["function"]
+
+        return {
+            "name": function_def["name"],
+            "description": function_def["description"],
+            "input_schema": function_def["parameters"],
+        }
+
+    # === Function Call Resolution ===
+
+    @staticmethod
+    def parse_openai_function_call(function_call: dict[str, Any]) -> UnifiedToolCall:
+        """
+        Parse OpenAI legacy function call format to unified format.
+
+        Args:
+            function_call: Dict with 'name' and 'arguments' keys
+
+        Returns:
+            UnifiedToolCall object
+        """
+        import json
+
+        name = function_call.get("name", "")
+        arguments_str = function_call.get("arguments", "{}")
+
+        try:
+            arguments = (
+                json.loads(arguments_str)
+                if isinstance(arguments_str, str)
+                else arguments_str
+            )
+        except (json.JSONDecodeError, TypeError):
+            arguments = {}
+
+        return UnifiedToolCall(
+            id=None, name=name, arguments=arguments, provider="openai"
+        )
+
+    @staticmethod
+    def parse_openai_tool_call(tool_call: dict[str, Any]) -> UnifiedToolCall:
+        """
+        Parse OpenAI tool call format to unified format.
+
+        Args:
+            tool_call: Dict with 'id', 'type', and 'function' keys
+
+        Returns:
+            UnifiedToolCall object
+        """
+        import json
+
+        tool_id = tool_call.get("id", "")
+        function_data = tool_call.get("function", {})
+        name = function_data.get("name", "")
+        arguments_str = function_data.get("arguments", "{}")
+
+        try:
+            arguments = (
+                json.loads(arguments_str)
+                if isinstance(arguments_str, str)
+                else arguments_str
+            )
+        except (json.JSONDecodeError, TypeError):
+            arguments = {}
+
+        return UnifiedToolCall(
+            id=tool_id, name=name, arguments=arguments, provider="openai"
+        )
+
+    @staticmethod
+    def parse_anthropic_tool_use(tool_use: dict[str, Any]) -> UnifiedToolCall:
+        """
+        Parse Anthropic tool use format to unified format.
+
+        Args:
+            tool_use: Dict with 'id', 'name', and 'input' keys
+
+        Returns:
+            UnifiedToolCall object
+        """
+        return UnifiedToolCall(
+            id=tool_use.get("id", ""),
+            name=tool_use.get("name", ""),
+            arguments=tool_use.get("input", {}),
+            provider="anthropic",
+        )
+
+    @staticmethod
+    def parse_tool_call(tool_call: dict[str, Any]) -> UnifiedToolCall:
+        """
+        Parse any tool call format to unified format.
+
+        Auto-detects the format based on the structure and converts accordingly.
+
+        Args:
+            tool_call: Tool call in any supported format
+
+        Returns:
+            UnifiedToolCall object
+
+        Example:
+            ```python
+            # OpenAI legacy format
+            openai_call = {"name": "search_memory", "arguments": '{"query": "test"}'}
+            unified = MemoryAPIClient.parse_tool_call(openai_call)
+
+            # OpenAI current format
+            openai_tool = {
+                "id": "call_123",
+                "type": "function",
+                "function": {"name": "search_memory", "arguments": '{"query": "test"}'}
+            }
+            unified = MemoryAPIClient.parse_tool_call(openai_tool)
+
+            # Anthropic format
+            anthropic_tool = {
+                "type": "tool_use",
+                "id": "tool_123",
+                "name": "search_memory",
+                "input": {"query": "test"}
+            }
+            unified = MemoryAPIClient.parse_tool_call(anthropic_tool)
+            ```
+        """
+        # Detect Anthropic format
+        if tool_call.get("type") == "tool_use" and "input" in tool_call:
+            return MemoryAPIClient.parse_anthropic_tool_use(tool_call)
+
+        # Detect OpenAI current tool call format
+        elif tool_call.get("type") == "function" and "function" in tool_call:
+            return MemoryAPIClient.parse_openai_tool_call(tool_call)
+
+        # Detect OpenAI legacy function call format
+        elif "name" in tool_call and "arguments" in tool_call:
+            return MemoryAPIClient.parse_openai_function_call(tool_call)
+
+        # Generic format - assume it's already in a usable format
+        else:
+            return UnifiedToolCall(
+                id=tool_call.get("id"),
+                name=tool_call.get("name", ""),
+                arguments=tool_call.get("arguments", {}),
+                provider="generic",
+            )
+
+    async def resolve_tool_call(
+        self,
+        tool_call: dict[str, Any],
+        session_id: str,
+        namespace: str | None = None,
+        user_id: str | None = None,
+    ) -> ToolCallResolutionResult:
+        """
+        Resolve a tool call from any LLM provider format.
+
+        This method automatically detects the tool call format (OpenAI, Anthropic, etc.)
+        and resolves it appropriately. This is the recommended method for handling
+        tool calls from different LLM providers.
+
+        Args:
+            tool_call: Tool call in any supported format
+            session_id: Session ID for working memory operations
+            namespace: Optional namespace for operations
+
+        Returns:
+            ToolCallResolutionResult with standardized response format
+
+        Example:
+            ```python
+            # Works with any provider format
+            result = await client.resolve_tool_call(
+                tool_call=provider_tool_call,  # Any format
+                session_id="session123",
+            )
+
+            if result["success"]:
+                print(result["formatted_response"])
+            else:
+                print(f"Error: {result['error']}")
+            ```
+        """
+        try:
+            # Parse to unified format
+            unified_call = self.parse_tool_call(tool_call)
+
+            # Resolve using the unified format
+            return await self.resolve_function_call(
+                function_name=unified_call["name"],
+                function_arguments=unified_call["arguments"],
+                session_id=session_id,
+                namespace=namespace,
+                user_id=user_id,
+            )
+
+        except Exception as e:
+            return ToolCallResolutionResult(
+                success=False,
+                function_name=tool_call.get("name", "unknown"),
+                result=None,
+                error=str(e),
+                formatted_response=f"I encountered an error processing the tool call: {str(e)}",
+            )
+
+    async def resolve_tool_calls(
+        self,
+        tool_calls: list[dict[str, Any]],
+        session_id: str,
+        namespace: str | None = None,
+        user_id: str | None = None,
+    ) -> list[ToolCallResolutionResult]:
+        """
+        Resolve multiple tool calls from any LLM provider format.
+
+        Args:
+            tool_calls: List of tool calls in any supported format
+            session_id: Session ID for working memory operations
+            namespace: Optional namespace for operations
+            user_id: Optional user ID for operations
+
+        Returns:
+            List of ToolCallResolutionResult objects in the same order as input
+
+        Example:
+            ```python
+            # Handle batch of tool calls from any provider
+            results = await client.resolve_tool_calls(
+                tool_calls=provider_tool_calls,
+                session_id="session123"
+            )
+
+            for result in results:
+                if result["success"]:
+                    print(f"{result['function_name']}: {result['formatted_response']}")
+            ```
+        """
+        results = []
+        for tool_call in tool_calls:
+            result = await self.resolve_tool_call(
+                tool_call=tool_call,
+                session_id=session_id,
+                namespace=namespace,
+                user_id=user_id,
+            )
+            results.append(result)
+
+        return results
+
+    async def resolve_function_call(
+        self,
+        function_name: str,
+        function_arguments: str | dict[str, Any],
+        session_id: str,
+        namespace: str | None = None,
+        user_id: str | None = None,
+    ) -> ToolCallResolutionResult:
+        """
+        Resolve a function call for memory-related tools.
+
+        This utility method handles all memory tool function calls with proper
+        error handling, argument parsing, and response formatting. Perfect for
+        LLM frameworks that need to handle function calls.
+
+        Args:
+            function_name: Name of the function to call
+            function_arguments: JSON string or dict of function arguments
+            session_id: Session ID for working memory operations
+            namespace: Optional namespace for operations
+
+        Returns:
+            Dict with standardized response format:
+            {
+                "success": bool,
+                "function_name": str,
+                "result": Any,  # The actual function result
+                "error": str | None,
+                "formatted_response": str,  # Human-readable response for LLM
+            }
+
+        Example:
+            ```python
+            # Handle OpenAI function call
+            if hasattr(response, "tool_calls"):
+                for tool_call in response.tool_calls:
+                    result = await client.resolve_function_call(
+                        function_name=tool_call.function.name,
+                        function_arguments=tool_call.function.arguments,
+                        session_id="current_session"
+                    )
+
+                    if result["success"]:
+                        print(result["formatted_response"])
+                    else:
+                        print(f"Error: {result['error']}")
+            ```
+        """
+        import json
+
+        # Parse arguments if they're a JSON string
+        try:
+            if isinstance(function_arguments, str):
+                args = json.loads(function_arguments)
+            else:
+                args = function_arguments or {}
+        except (json.JSONDecodeError, TypeError) as e:
+            return ToolCallResolutionResult(
+                success=False,
+                function_name=function_name,
+                result=None,
+                error=f"Invalid function arguments: {function_arguments}. JSON decode error: {str(e)}",
+                formatted_response="I encountered an error parsing the function arguments. Please try again.",
+            )
+
+        # Apply default namespace if not provided
+        effective_namespace = namespace or self.config.default_namespace
+
+        try:
+            # Route to appropriate function based on name
+            if function_name == "search_memory":
+                result = await self._resolve_search_memory(args)
+
+            elif function_name == "get_working_memory":
+                result = await self._resolve_get_working_memory(
+                    session_id, effective_namespace, user_id
+                )
+
+            elif function_name == "add_memory_to_working_memory":
+                result = await self._resolve_add_memory(
+                    args, session_id, effective_namespace, user_id
+                )
+
+            elif function_name == "update_working_memory_data":
+                result = await self._resolve_update_memory_data(
+                    args, session_id, effective_namespace, user_id
+                )
+
+            else:
+                return ToolCallResolutionResult(
+                    success=False,
+                    function_name=function_name,
+                    result=None,
+                    error=f"Unknown function: {function_name}",
+                    formatted_response=f"I don't know how to handle the function '{function_name}'. Please check the function name.",
+                )
+
+            return ToolCallResolutionResult(
+                success=True,
+                function_name=function_name,
+                result=result,
+                error=None,
+                formatted_response=result.get("summary", str(result))
+                if isinstance(result, dict)
+                else str(result),
+            )
+
+        except Exception as e:
+            return ToolCallResolutionResult(
+                success=False,
+                function_name=function_name,
+                result=None,
+                error=str(e),
+                formatted_response=f"I encountered an error while executing {function_name}: {str(e)}",
+            )
+
+    async def _resolve_search_memory(self, args: dict[str, Any]) -> dict[str, Any]:
+        """Resolve search_memory function call."""
+        query = args.get("query", "")
+        if not query:
+            raise ValueError("Query parameter is required for memory search")
+
+        topics = args.get("topics")
+        entities = args.get("entities")
+        memory_type = args.get("memory_type")
+        max_results = args.get("max_results", 5)
+        min_relevance = args.get("min_relevance")
+        user_id = args.get("user_id")
+
+        return await self.search_memory_tool(
+            query=query,
+            topics=topics,
+            entities=entities,
+            memory_type=memory_type,
+            max_results=max_results,
+            min_relevance=min_relevance,
+            user_id=user_id,
+        )
+
+    async def _resolve_get_working_memory(
+        self, session_id: str, namespace: str | None, user_id: str | None = None
+    ) -> dict[str, Any]:
+        """Resolve get_working_memory function call."""
+        return await self.get_working_memory_tool(
+            session_id=session_id,
+            namespace=namespace,
+            user_id=user_id,
+        )
+
+    async def _resolve_add_memory(
+        self,
+        args: dict[str, Any],
+        session_id: str,
+        namespace: str | None,
+        user_id: str | None = None,
+    ) -> dict[str, Any]:
+        """Resolve add_memory_to_working_memory function call."""
+        text = args.get("text", "")
+        if not text:
+            raise ValueError("Text parameter is required for adding memory")
+
+        memory_type = args.get("memory_type", "semantic")
+        topics = args.get("topics")
+        entities = args.get("entities")
+
+        return await self.add_memory_tool(
+            session_id=session_id,
+            text=text,
+            memory_type=memory_type,
+            topics=topics,
+            entities=entities,
+            namespace=namespace,
+            user_id=user_id,
+        )
+
+    async def _resolve_update_memory_data(
+        self,
+        args: dict[str, Any],
+        session_id: str,
+        namespace: str | None,
+        user_id: str | None = None,
+    ) -> dict[str, Any]:
+        """Resolve update_working_memory_data function call."""
+        data = args.get("data", {})
+        if not data:
+            raise ValueError(
+                "Data parameter is required for updating working memory data"
+            )
+
+        merge_strategy = args.get("merge_strategy", "merge")
+
+        return await self.update_memory_data_tool(
+            session_id=session_id,
+            data=data,
+            merge_strategy=merge_strategy,
+            namespace=namespace,
+            user_id=user_id,
+        )
+
+    async def resolve_function_calls(
+        self,
+        function_calls: list[dict[str, Any]],
+        session_id: str,
+        namespace: str | None = None,
+        user_id: str | None = None,
+    ) -> list[ToolCallResolutionResult]:
+        """
+        Resolve multiple function calls in batch.
+
+        Args:
+            function_calls: List of function call dicts with 'name' and 'arguments' keys
+            session_id: Session ID for working memory operations
+            namespace: Optional namespace for operations
+            user_id: Optional user ID for operations
+
+        Returns:
+            List of resolution results in the same order as input
+
+        Example:
+            ```python
+            # Handle multiple function calls
+            calls = [
+                {"name": "search_memory", "arguments": {"query": "user preferences"}},
+                {"name": "get_working_memory", "arguments": {}},
+            ]
+
+            results = await client.resolve_function_calls(calls, "session123")
+            for result in results:
+                if result["success"]:
+                    print(f"{result['function_name']}: {result['formatted_response']}")
+            ```
+        """
+        results = []
+        for call in function_calls:
+            function_name = call.get("name", "")
+            function_arguments = call.get("arguments", {})
+
+            result = await self.resolve_function_call(
+                function_name=function_name,
+                function_arguments=function_arguments,
+                session_id=session_id,
+                namespace=namespace,
+                user_id=user_id,
+            )
+            results.append(result)
+
+        return results
+
     # === Memory Lifecycle Management ===
 
     async def promote_working_memories_to_long_term(
@@ -654,7 +1880,7 @@ async def promote_working_memories_to_long_term(
             Acknowledgement of promotion operation
         """
         # Get current working memory
-        working_memory = await self.get_session_memory(
+        working_memory = await self.get_working_memory(
             session_id=session_id, namespace=namespace
         )
 
@@ -933,7 +2159,7 @@ async def update_working_memory_data(
         # Get existing memory
         existing_memory = None
         with contextlib.suppress(Exception):
-            existing_memory = await self.get_session_memory(
+            existing_memory = await self.get_working_memory(
                 session_id=session_id, namespace=namespace
             )
 
@@ -961,7 +2187,7 @@ async def update_working_memory_data(
             user_id=existing_memory.user_id if existing_memory else None,
         )
 
-        return await self.put_session_memory(session_id, working_memory)
+        return await self.put_working_memory(session_id, working_memory)
 
     async def append_messages_to_working_memory(
         self,
@@ -969,6 +2195,8 @@ async def append_messages_to_working_memory(
         messages: list[Any],  # Using Any since MemoryMessage isn't imported
         namespace: str | None = None,
         auto_summarize: bool = True,
+        model_name: str | None = None,
+        context_window_max: int | None = None,
     ) -> WorkingMemoryResponse:
         """
         Append new messages to existing working memory.
@@ -980,14 +2208,16 @@ async def append_messages_to_working_memory(
             messages: List of messages to append
             namespace: Optional namespace
             auto_summarize: Whether to allow automatic summarization
+            model_name: Optional model name for token-based summarization
+            context_window_max: Optional direct specification of context window max tokens
 
         Returns:
-            WorkingMemoryResponse with updated memory
+            WorkingMemoryResponse with updated memory (potentially summarized if token limit exceeded)
         """
         # Get existing memory
         existing_memory = None
         with contextlib.suppress(Exception):
-            existing_memory = await self.get_session_memory(
+            existing_memory = await self.get_working_memory(
                 session_id=session_id, namespace=namespace
             )
 
@@ -1003,8 +2233,11 @@ async def append_messages_to_working_memory(
                 converted_existing_messages.append(
                     {"role": msg.role, "content": msg.content}
                 )
+            elif isinstance(msg, dict):
+                # Message is already a dictionary, use as-is
+                converted_existing_messages.append(msg)
             else:
-                # Fallback for any other message type
+                # Fallback for any other message type - convert to string content
                 converted_existing_messages.append(
                     {"role": "user", "content": str(msg)}
                 )
@@ -1016,7 +2249,11 @@ async def append_messages_to_working_memory(
                 new_messages.append(msg.model_dump())
             elif hasattr(msg, "role") and hasattr(msg, "content"):
                 new_messages.append({"role": msg.role, "content": msg.content})
+            elif isinstance(msg, dict):
+                # Message is already a dictionary, use as-is
+                new_messages.append(msg)
             else:
+                # Fallback - assume it's already in the right format
                 new_messages.append(msg)
 
         final_messages = converted_existing_messages + new_messages
@@ -1032,7 +2269,12 @@ async def append_messages_to_working_memory(
             user_id=existing_memory.user_id if existing_memory else None,
         )
 
-        return await self.put_session_memory(session_id, working_memory)
+        return await self.put_working_memory(
+            session_id,
+            working_memory,
+            model_name=model_name,
+            context_window_max=context_window_max,
+        )
 
     async def memory_prompt(
         self,
@@ -1058,6 +2300,24 @@ async def memory_prompt(
 
         Returns:
             Dict with messages hydrated with relevant memory context
+
+        Example:
+            ```python
+            # Create a prompt with both session and long-term memory context
+            prompt = await client.memory_prompt(
+                query="What are my UI preferences?",
+                session_id="current_session",
+                window_size=10,
+                long_term_search={
+                    "topics": {"any": ["preferences", "ui"]},
+                    "limit": 5
+                }
+            )
+
+            # Send to your LLM
+            messages = prompt.get("messages", [])
+            # Add the user query and send to OpenAI, Claude, etc.
+            ```
         """
         payload: dict[str, Any] = {"query": query}
 
@@ -1070,10 +2330,17 @@ async def memory_prompt(
                 session_params["namespace"] = self.config.default_namespace
             if window_size is not None:
                 session_params["window_size"] = str(window_size)
-            if model_name is not None:
-                session_params["model_name"] = model_name
-            if context_window_max is not None:
-                session_params["context_window_max"] = str(context_window_max)
+            # Use provided model_name or fall back to config default
+            effective_model_name = model_name or self.config.default_model_name
+            if effective_model_name is not None:
+                session_params["model_name"] = effective_model_name
+
+            # Use provided context_window_max or fall back to config default
+            effective_context_window_max = (
+                context_window_max or self.config.default_context_window_max
+            )
+            if effective_context_window_max is not None:
+                session_params["context_window_max"] = str(effective_context_window_max)
             payload["session"] = session_params
 
         # Add long-term search parameters if provided
@@ -1178,7 +2445,11 @@ def _deep_merge_dicts(
 
 # Helper function to create a memory client
 async def create_memory_client(
-    base_url: str, timeout: float = 30.0, default_namespace: str | None = None
+    base_url: str,
+    timeout: float = 30.0,
+    default_namespace: str | None = None,
+    default_model_name: str | None = None,
+    default_context_window_max: int | None = None,
 ) -> MemoryAPIClient:
     """
     Create and initialize a Memory API Client.
@@ -1187,17 +2458,48 @@ async def create_memory_client(
         base_url: Base URL of the memory server (e.g., 'http://localhost:8000')
         timeout: Request timeout in seconds (default: 30.0)
         default_namespace: Optional default namespace to use for operations
+        default_model_name: Optional default model name for auto-summarization
+        default_context_window_max: Optional default context window limit for auto-summarization
 
     Returns:
         Initialized MemoryAPIClient instance
 
     Raises:
         MemoryClientError: If unable to connect to the server
+
+    Example:
+        ```python
+        # Basic client setup
+        client = await create_memory_client("http://localhost:8000")
+
+        # With custom namespace and timeout
+        client = await create_memory_client(
+            base_url="http://memory-server.example.com",
+            timeout=60.0,
+            default_namespace="my_app"
+        )
+
+        # With model configuration for auto-summarization
+        client = await create_memory_client(
+            base_url="http://localhost:8000",
+            default_model_name="gpt-4o",
+            default_namespace="travel_agent"
+        )
+
+        # Use as context manager
+        async with await create_memory_client("http://localhost:8000") as client:
+            results = await client.search_memory_tool(
+                query="user preferences",
+                topics=["ui", "settings"]
+            )
+        ```
     """
     config = MemoryClientConfig(
         base_url=base_url,
         timeout=timeout,
         default_namespace=default_namespace,
+        default_model_name=default_model_name,
+        default_context_window_max=default_context_window_max,
     )
     client = MemoryAPIClient(config)
 
diff --git a/agent-memory-client/agent_memory_client/models.py b/agent-memory-client/agent_memory_client/models.py
index 23d83e9..a21325d 100644
--- a/agent-memory-client/agent_memory_client/models.py
+++ b/agent-memory-client/agent_memory_client/models.py
@@ -7,7 +7,7 @@
 
 from datetime import datetime, timezone
 from enum import Enum
-from typing import Any, Literal
+from typing import Any, Literal, TypedDict
 
 import ulid
 from pydantic import BaseModel, Field
@@ -48,7 +48,7 @@ class MemoryTypeEnum(str, Enum):
     MESSAGE = "message"
 
 
-class MemoryMessage(BaseModel):
+class MemoryMessage(TypedDict):
     """A message in the memory system"""
 
     role: str
@@ -134,7 +134,7 @@ class WorkingMemory(BaseModel):
     """Working memory for a session - contains both messages and structured memory records"""
 
     # Support both message-based memory (conversation) and structured memory records
-    messages: list[MemoryMessage] = Field(
+    messages: list[dict[str, Any]] = Field(
         default_factory=list,
         description="Conversation messages (role/content pairs)",
     )
diff --git a/agent-memory-client/agent_memory_client/py.typed b/agent-memory-client/agent_memory_client/py.typed
new file mode 100644
index 0000000..0519ecb
--- /dev/null
+++ b/agent-memory-client/agent_memory_client/py.typed
@@ -0,0 +1 @@
+ 
\ No newline at end of file
diff --git a/agent-memory-client/pyproject.toml b/agent-memory-client/pyproject.toml
index 7970592..d33dfe0 100644
--- a/agent-memory-client/pyproject.toml
+++ b/agent-memory-client/pyproject.toml
@@ -28,7 +28,7 @@ classifiers = [
 dependencies = [
     "httpx>=0.25.0",
     "pydantic>=2.0.0",
-    "ulid-py>=1.1.0",
+    "python-ulid>=3.0.0",
 ]
 
 [project.optional-dependencies]
@@ -36,6 +36,7 @@ dev = [
     "pytest>=7.0.0",
     "pytest-asyncio>=0.21.0",
     "pytest-httpx>=0.21.0",
+    "pytest-cov>=4.0.0",
     "ruff>=0.1.0",
     "mypy>=1.5.0",
 ]
@@ -54,6 +55,8 @@ packages = ["agent_memory_client"]
 
 [tool.ruff]
 line-length = 88
+
+[tool.ruff.lint]
 select = [
     "E",  # pycodestyle errors
     "W",  # pycodestyle warnings
@@ -68,7 +71,7 @@ ignore = [
     "E501",  # line too long
 ]
 
-[tool.ruff.per-file-ignores]
+[tool.ruff.lint.per-file-ignores]
 "__init__.py" = ["F401"]
 
 [tool.pytest.ini_options]
diff --git a/agent-memory-client/tests/test_client.py b/agent-memory-client/tests/test_client.py
index 01bf7c9..ee41eb6 100644
--- a/agent-memory-client/tests/test_client.py
+++ b/agent-memory-client/tests/test_client.py
@@ -73,7 +73,7 @@ async def test_promote_working_memories_to_long_term(self, enhanced_test_client)
         )
 
         with (
-            patch.object(enhanced_test_client, "get_session_memory") as mock_get,
+            patch.object(enhanced_test_client, "get_working_memory") as mock_get,
             patch.object(
                 enhanced_test_client, "create_long_term_memory"
             ) as mock_create,
@@ -118,7 +118,7 @@ async def test_promote_specific_memory_ids(self, enhanced_test_client):
         )
 
         with (
-            patch.object(enhanced_test_client, "get_session_memory") as mock_get,
+            patch.object(enhanced_test_client, "get_working_memory") as mock_get,
             patch.object(
                 enhanced_test_client, "create_long_term_memory"
             ) as mock_create,
@@ -152,7 +152,7 @@ async def test_promote_no_memories(self, enhanced_test_client):
             user_id=None,
         )
 
-        with patch.object(enhanced_test_client, "get_session_memory") as mock_get:
+        with patch.object(enhanced_test_client, "get_working_memory") as mock_get:
             mock_get.return_value = working_memory_response
 
             result = await enhanced_test_client.promote_working_memories_to_long_term(
@@ -436,8 +436,8 @@ async def test_update_working_memory_data_merge(self, enhanced_test_client):
         )
 
         with (
-            patch.object(enhanced_test_client, "get_session_memory") as mock_get,
-            patch.object(enhanced_test_client, "put_session_memory") as mock_put,
+            patch.object(enhanced_test_client, "get_working_memory") as mock_get,
+            patch.object(enhanced_test_client, "put_working_memory") as mock_put,
         ):
             mock_get.return_value = existing_memory
             mock_put.return_value = existing_memory
@@ -475,8 +475,8 @@ async def test_update_working_memory_data_replace(self, enhanced_test_client):
         )
 
         with (
-            patch.object(enhanced_test_client, "get_session_memory") as mock_get,
-            patch.object(enhanced_test_client, "put_session_memory") as mock_put,
+            patch.object(enhanced_test_client, "get_working_memory") as mock_get,
+            patch.object(enhanced_test_client, "put_working_memory") as mock_put,
         ):
             mock_get.return_value = existing_memory
             mock_put.return_value = existing_memory
@@ -511,8 +511,8 @@ async def test_update_working_memory_data_deep_merge(self, enhanced_test_client)
         )
 
         with (
-            patch.object(enhanced_test_client, "get_session_memory") as mock_get,
-            patch.object(enhanced_test_client, "put_session_memory") as mock_put,
+            patch.object(enhanced_test_client, "get_working_memory") as mock_get,
+            patch.object(enhanced_test_client, "put_working_memory") as mock_put,
         ):
             mock_get.return_value = existing_memory
             mock_put.return_value = existing_memory
@@ -561,8 +561,8 @@ async def test_append_messages_to_working_memory(self, enhanced_test_client):
         ]
 
         with (
-            patch.object(enhanced_test_client, "get_session_memory") as mock_get,
-            patch.object(enhanced_test_client, "put_session_memory") as mock_put,
+            patch.object(enhanced_test_client, "get_working_memory") as mock_get,
+            patch.object(enhanced_test_client, "put_working_memory") as mock_put,
         ):
             mock_get.return_value = existing_memory
             mock_put.return_value = existing_memory
@@ -575,9 +575,9 @@ async def test_append_messages_to_working_memory(self, enhanced_test_client):
             # Check that messages were appended
             working_memory_arg = mock_put.call_args[0][1]
             assert len(working_memory_arg.messages) == 3
-            assert working_memory_arg.messages[0].content == "First message"
-            assert working_memory_arg.messages[1].content == "Second message"
-            assert working_memory_arg.messages[2].content == "Third message"
+            assert working_memory_arg.messages[0]["content"] == "First message"
+            assert working_memory_arg.messages[1]["content"] == "Second message"
+            assert working_memory_arg.messages[2]["content"] == "Third message"
 
     def test_deep_merge_dicts(self, enhanced_test_client):
         """Test the deep merge dictionary utility method."""
diff --git a/agent-memory-client/uv.lock b/agent-memory-client/uv.lock
index f205d4e..8e4580d 100644
--- a/agent-memory-client/uv.lock
+++ b/agent-memory-client/uv.lock
@@ -7,7 +7,7 @@ source = { editable = "." }
 dependencies = [
     { name = "httpx" },
     { name = "pydantic" },
-    { name = "ulid-py" },
+    { name = "python-ulid" },
 ]
 
 [package.optional-dependencies]
@@ -15,6 +15,7 @@ dev = [
     { name = "mypy" },
     { name = "pytest" },
     { name = "pytest-asyncio" },
+    { name = "pytest-cov" },
     { name = "pytest-httpx" },
     { name = "ruff" },
 ]
@@ -26,9 +27,10 @@ requires-dist = [
     { name = "pydantic", specifier = ">=2.0.0" },
     { name = "pytest", marker = "extra == 'dev'", specifier = ">=7.0.0" },
     { name = "pytest-asyncio", marker = "extra == 'dev'", specifier = ">=0.21.0" },
+    { name = "pytest-cov", marker = "extra == 'dev'", specifier = ">=4.0.0" },
     { name = "pytest-httpx", marker = "extra == 'dev'", specifier = ">=0.21.0" },
+    { name = "python-ulid", specifier = ">=3.0.0" },
     { name = "ruff", marker = "extra == 'dev'", specifier = ">=0.1.0" },
-    { name = "ulid-py", specifier = ">=1.1.0" },
 ]
 
 [[package]]
@@ -73,6 +75,75 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/d1/d6/3965ed04c63042e047cb6a3e6ed1a63a35087b6a609aa3a15ed8ac56c221/colorama-0.4.6-py2.py3-none-any.whl", hash = "sha256:4f1d9991f5acc0ca119f9d443620b77f9d6b33703e51011c16baf57afb285fc6", size = 25335 },
 ]
 
+[[package]]
+name = "coverage"
+version = "7.9.1"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/e7/e0/98670a80884f64578f0c22cd70c5e81a6e07b08167721c7487b4d70a7ca0/coverage-7.9.1.tar.gz", hash = "sha256:6cf43c78c4282708a28e466316935ec7489a9c487518a77fa68f716c67909cec", size = 813650 }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/c1/78/1c1c5ec58f16817c09cbacb39783c3655d54a221b6552f47ff5ac9297603/coverage-7.9.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:cc94d7c5e8423920787c33d811c0be67b7be83c705f001f7180c7b186dcf10ca", size = 212028 },
+    { url = "https://files.pythonhosted.org/packages/98/db/e91b9076f3a888e3b4ad7972ea3842297a52cc52e73fd1e529856e473510/coverage-7.9.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:16aa0830d0c08a2c40c264cef801db8bc4fc0e1892782e45bcacbd5889270509", size = 212420 },
+    { url = "https://files.pythonhosted.org/packages/0e/d0/2b3733412954576b0aea0a16c3b6b8fbe95eb975d8bfa10b07359ead4252/coverage-7.9.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:cf95981b126f23db63e9dbe4cf65bd71f9a6305696fa5e2262693bc4e2183f5b", size = 241529 },
+    { url = "https://files.pythonhosted.org/packages/b3/00/5e2e5ae2e750a872226a68e984d4d3f3563cb01d1afb449a17aa819bc2c4/coverage-7.9.1-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:f05031cf21699785cd47cb7485f67df619e7bcdae38e0fde40d23d3d0210d3c3", size = 239403 },
+    { url = "https://files.pythonhosted.org/packages/37/3b/a2c27736035156b0a7c20683afe7df498480c0dfdf503b8c878a21b6d7fb/coverage-7.9.1-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bb4fbcab8764dc072cb651a4bcda4d11fb5658a1d8d68842a862a6610bd8cfa3", size = 240548 },
+    { url = "https://files.pythonhosted.org/packages/98/f5/13d5fc074c3c0e0dc80422d9535814abf190f1254d7c3451590dc4f8b18c/coverage-7.9.1-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:0f16649a7330ec307942ed27d06ee7e7a38417144620bb3d6e9a18ded8a2d3e5", size = 240459 },
+    { url = "https://files.pythonhosted.org/packages/36/24/24b9676ea06102df824c4a56ffd13dc9da7904478db519efa877d16527d5/coverage-7.9.1-cp310-cp310-musllinux_1_2_i686.whl", hash = "sha256:cea0a27a89e6432705fffc178064503508e3c0184b4f061700e771a09de58187", size = 239128 },
+    { url = "https://files.pythonhosted.org/packages/be/05/242b7a7d491b369ac5fee7908a6e5ba42b3030450f3ad62c645b40c23e0e/coverage-7.9.1-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:e980b53a959fa53b6f05343afbd1e6f44a23ed6c23c4b4c56c6662bbb40c82ce", size = 239402 },
+    { url = "https://files.pythonhosted.org/packages/73/e0/4de7f87192fa65c9c8fbaeb75507e124f82396b71de1797da5602898be32/coverage-7.9.1-cp310-cp310-win32.whl", hash = "sha256:70760b4c5560be6ca70d11f8988ee6542b003f982b32f83d5ac0b72476607b70", size = 214518 },
+    { url = "https://files.pythonhosted.org/packages/d5/ab/5e4e2fe458907d2a65fab62c773671cfc5ac704f1e7a9ddd91996f66e3c2/coverage-7.9.1-cp310-cp310-win_amd64.whl", hash = "sha256:a66e8f628b71f78c0e0342003d53b53101ba4e00ea8dabb799d9dba0abbbcebe", size = 215436 },
+    { url = "https://files.pythonhosted.org/packages/60/34/fa69372a07d0903a78ac103422ad34db72281c9fc625eba94ac1185da66f/coverage-7.9.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:95c765060e65c692da2d2f51a9499c5e9f5cf5453aeaf1420e3fc847cc060582", size = 212146 },
+    { url = "https://files.pythonhosted.org/packages/27/f0/da1894915d2767f093f081c42afeba18e760f12fdd7a2f4acbe00564d767/coverage-7.9.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:ba383dc6afd5ec5b7a0d0c23d38895db0e15bcba7fb0fa8901f245267ac30d86", size = 212536 },
+    { url = "https://files.pythonhosted.org/packages/10/d5/3fc33b06e41e390f88eef111226a24e4504d216ab8e5d1a7089aa5a3c87a/coverage-7.9.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:37ae0383f13cbdcf1e5e7014489b0d71cc0106458878ccde52e8a12ced4298ed", size = 245092 },
+    { url = "https://files.pythonhosted.org/packages/0a/39/7aa901c14977aba637b78e95800edf77f29f5a380d29768c5b66f258305b/coverage-7.9.1-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:69aa417a030bf11ec46149636314c24c8d60fadb12fc0ee8f10fda0d918c879d", size = 242806 },
+    { url = "https://files.pythonhosted.org/packages/43/fc/30e5cfeaf560b1fc1989227adedc11019ce4bb7cce59d65db34fe0c2d963/coverage-7.9.1-cp311-cp311-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0a4be2a28656afe279b34d4f91c3e26eccf2f85500d4a4ff0b1f8b54bf807338", size = 244610 },
+    { url = "https://files.pythonhosted.org/packages/bf/15/cca62b13f39650bc87b2b92bb03bce7f0e79dd0bf2c7529e9fc7393e4d60/coverage-7.9.1-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:382e7ddd5289f140259b610e5f5c58f713d025cb2f66d0eb17e68d0a94278875", size = 244257 },
+    { url = "https://files.pythonhosted.org/packages/cd/1a/c0f2abe92c29e1464dbd0ff9d56cb6c88ae2b9e21becdb38bea31fcb2f6c/coverage-7.9.1-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:e5532482344186c543c37bfad0ee6069e8ae4fc38d073b8bc836fc8f03c9e250", size = 242309 },
+    { url = "https://files.pythonhosted.org/packages/57/8d/c6fd70848bd9bf88fa90df2af5636589a8126d2170f3aade21ed53f2b67a/coverage-7.9.1-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:a39d18b3f50cc121d0ce3838d32d58bd1d15dab89c910358ebefc3665712256c", size = 242898 },
+    { url = "https://files.pythonhosted.org/packages/c2/9e/6ca46c7bff4675f09a66fe2797cd1ad6a24f14c9c7c3b3ebe0470a6e30b8/coverage-7.9.1-cp311-cp311-win32.whl", hash = "sha256:dd24bd8d77c98557880def750782df77ab2b6885a18483dc8588792247174b32", size = 214561 },
+    { url = "https://files.pythonhosted.org/packages/a1/30/166978c6302010742dabcdc425fa0f938fa5a800908e39aff37a7a876a13/coverage-7.9.1-cp311-cp311-win_amd64.whl", hash = "sha256:6b55ad10a35a21b8015eabddc9ba31eb590f54adc9cd39bcf09ff5349fd52125", size = 215493 },
+    { url = "https://files.pythonhosted.org/packages/60/07/a6d2342cd80a5be9f0eeab115bc5ebb3917b4a64c2953534273cf9bc7ae6/coverage-7.9.1-cp311-cp311-win_arm64.whl", hash = "sha256:6ad935f0016be24c0e97fc8c40c465f9c4b85cbbe6eac48934c0dc4d2568321e", size = 213869 },
+    { url = "https://files.pythonhosted.org/packages/68/d9/7f66eb0a8f2fce222de7bdc2046ec41cb31fe33fb55a330037833fb88afc/coverage-7.9.1-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:a8de12b4b87c20de895f10567639c0797b621b22897b0af3ce4b4e204a743626", size = 212336 },
+    { url = "https://files.pythonhosted.org/packages/20/20/e07cb920ef3addf20f052ee3d54906e57407b6aeee3227a9c91eea38a665/coverage-7.9.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:5add197315a054e92cee1b5f686a2bcba60c4c3e66ee3de77ace6c867bdee7cb", size = 212571 },
+    { url = "https://files.pythonhosted.org/packages/78/f8/96f155de7e9e248ca9c8ff1a40a521d944ba48bec65352da9be2463745bf/coverage-7.9.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:600a1d4106fe66f41e5d0136dfbc68fe7200a5cbe85610ddf094f8f22e1b0300", size = 246377 },
+    { url = "https://files.pythonhosted.org/packages/3e/cf/1d783bd05b7bca5c10ded5f946068909372e94615a4416afadfe3f63492d/coverage-7.9.1-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:2a876e4c3e5a2a1715a6608906aa5a2e0475b9c0f68343c2ada98110512ab1d8", size = 243394 },
+    { url = "https://files.pythonhosted.org/packages/02/dd/e7b20afd35b0a1abea09fb3998e1abc9f9bd953bee548f235aebd2b11401/coverage-7.9.1-cp312-cp312-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:81f34346dd63010453922c8e628a52ea2d2ccd73cb2487f7700ac531b247c8a5", size = 245586 },
+    { url = "https://files.pythonhosted.org/packages/4e/38/b30b0006fea9d617d1cb8e43b1bc9a96af11eff42b87eb8c716cf4d37469/coverage-7.9.1-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:888f8eee13f2377ce86d44f338968eedec3291876b0b8a7289247ba52cb984cd", size = 245396 },
+    { url = "https://files.pythonhosted.org/packages/31/e4/4d8ec1dc826e16791f3daf1b50943e8e7e1eb70e8efa7abb03936ff48418/coverage-7.9.1-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:9969ef1e69b8c8e1e70d591f91bbc37fc9a3621e447525d1602801a24ceda898", size = 243577 },
+    { url = "https://files.pythonhosted.org/packages/25/f4/b0e96c5c38e6e40ef465c4bc7f138863e2909c00e54a331da335faf0d81a/coverage-7.9.1-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:60c458224331ee3f1a5b472773e4a085cc27a86a0b48205409d364272d67140d", size = 244809 },
+    { url = "https://files.pythonhosted.org/packages/8a/65/27e0a1fa5e2e5079bdca4521be2f5dabf516f94e29a0defed35ac2382eb2/coverage-7.9.1-cp312-cp312-win32.whl", hash = "sha256:5f646a99a8c2b3ff4c6a6e081f78fad0dde275cd59f8f49dc4eab2e394332e74", size = 214724 },
+    { url = "https://files.pythonhosted.org/packages/9b/a8/d5b128633fd1a5e0401a4160d02fa15986209a9e47717174f99dc2f7166d/coverage-7.9.1-cp312-cp312-win_amd64.whl", hash = "sha256:30f445f85c353090b83e552dcbbdad3ec84c7967e108c3ae54556ca69955563e", size = 215535 },
+    { url = "https://files.pythonhosted.org/packages/a3/37/84bba9d2afabc3611f3e4325ee2c6a47cd449b580d4a606b240ce5a6f9bf/coverage-7.9.1-cp312-cp312-win_arm64.whl", hash = "sha256:af41da5dca398d3474129c58cb2b106a5d93bbb196be0d307ac82311ca234342", size = 213904 },
+    { url = "https://files.pythonhosted.org/packages/d0/a7/a027970c991ca90f24e968999f7d509332daf6b8c3533d68633930aaebac/coverage-7.9.1-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:31324f18d5969feef7344a932c32428a2d1a3e50b15a6404e97cba1cc9b2c631", size = 212358 },
+    { url = "https://files.pythonhosted.org/packages/f2/48/6aaed3651ae83b231556750280682528fea8ac7f1232834573472d83e459/coverage-7.9.1-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:0c804506d624e8a20fb3108764c52e0eef664e29d21692afa375e0dd98dc384f", size = 212620 },
+    { url = "https://files.pythonhosted.org/packages/6c/2a/f4b613f3b44d8b9f144847c89151992b2b6b79cbc506dee89ad0c35f209d/coverage-7.9.1-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ef64c27bc40189f36fcc50c3fb8f16ccda73b6a0b80d9bd6e6ce4cffcd810bbd", size = 245788 },
+    { url = "https://files.pythonhosted.org/packages/04/d2/de4fdc03af5e4e035ef420ed26a703c6ad3d7a07aff2e959eb84e3b19ca8/coverage-7.9.1-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:d4fe2348cc6ec372e25adec0219ee2334a68d2f5222e0cba9c0d613394e12d86", size = 243001 },
+    { url = "https://files.pythonhosted.org/packages/f5/e8/eed18aa5583b0423ab7f04e34659e51101135c41cd1dcb33ac1d7013a6d6/coverage-7.9.1-cp313-cp313-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:34ed2186fe52fcc24d4561041979a0dec69adae7bce2ae8d1c49eace13e55c43", size = 244985 },
+    { url = "https://files.pythonhosted.org/packages/17/f8/ae9e5cce8885728c934eaa58ebfa8281d488ef2afa81c3dbc8ee9e6d80db/coverage-7.9.1-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:25308bd3d00d5eedd5ae7d4357161f4df743e3c0240fa773ee1b0f75e6c7c0f1", size = 245152 },
+    { url = "https://files.pythonhosted.org/packages/5a/c8/272c01ae792bb3af9b30fac14d71d63371db227980682836ec388e2c57c0/coverage-7.9.1-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:73e9439310f65d55a5a1e0564b48e34f5369bee943d72c88378f2d576f5a5751", size = 243123 },
+    { url = "https://files.pythonhosted.org/packages/8c/d0/2819a1e3086143c094ab446e3bdf07138527a7b88cb235c488e78150ba7a/coverage-7.9.1-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:37ab6be0859141b53aa89412a82454b482c81cf750de4f29223d52268a86de67", size = 244506 },
+    { url = "https://files.pythonhosted.org/packages/8b/4e/9f6117b89152df7b6112f65c7a4ed1f2f5ec8e60c4be8f351d91e7acc848/coverage-7.9.1-cp313-cp313-win32.whl", hash = "sha256:64bdd969456e2d02a8b08aa047a92d269c7ac1f47e0c977675d550c9a0863643", size = 214766 },
+    { url = "https://files.pythonhosted.org/packages/27/0f/4b59f7c93b52c2c4ce7387c5a4e135e49891bb3b7408dcc98fe44033bbe0/coverage-7.9.1-cp313-cp313-win_amd64.whl", hash = "sha256:be9e3f68ca9edb897c2184ad0eee815c635565dbe7a0e7e814dc1f7cbab92c0a", size = 215568 },
+    { url = "https://files.pythonhosted.org/packages/09/1e/9679826336f8c67b9c39a359352882b24a8a7aee48d4c9cad08d38d7510f/coverage-7.9.1-cp313-cp313-win_arm64.whl", hash = "sha256:1c503289ffef1d5105d91bbb4d62cbe4b14bec4d13ca225f9c73cde9bb46207d", size = 213939 },
+    { url = "https://files.pythonhosted.org/packages/bb/5b/5c6b4e7a407359a2e3b27bf9c8a7b658127975def62077d441b93a30dbe8/coverage-7.9.1-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:0b3496922cb5f4215bf5caaef4cf12364a26b0be82e9ed6d050f3352cf2d7ef0", size = 213079 },
+    { url = "https://files.pythonhosted.org/packages/a2/22/1e2e07279fd2fd97ae26c01cc2186e2258850e9ec125ae87184225662e89/coverage-7.9.1-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:9565c3ab1c93310569ec0d86b017f128f027cab0b622b7af288696d7ed43a16d", size = 213299 },
+    { url = "https://files.pythonhosted.org/packages/14/c0/4c5125a4b69d66b8c85986d3321520f628756cf524af810baab0790c7647/coverage-7.9.1-cp313-cp313t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:2241ad5dbf79ae1d9c08fe52b36d03ca122fb9ac6bca0f34439e99f8327ac89f", size = 256535 },
+    { url = "https://files.pythonhosted.org/packages/81/8b/e36a04889dda9960be4263e95e777e7b46f1bb4fc32202612c130a20c4da/coverage-7.9.1-cp313-cp313t-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:3bb5838701ca68b10ebc0937dbd0eb81974bac54447c55cd58dea5bca8451029", size = 252756 },
+    { url = "https://files.pythonhosted.org/packages/98/82/be04eff8083a09a4622ecd0e1f31a2c563dbea3ed848069e7b0445043a70/coverage-7.9.1-cp313-cp313t-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b30a25f814591a8c0c5372c11ac8967f669b97444c47fd794926e175c4047ece", size = 254912 },
+    { url = "https://files.pythonhosted.org/packages/0f/25/c26610a2c7f018508a5ab958e5b3202d900422cf7cdca7670b6b8ca4e8df/coverage-7.9.1-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:2d04b16a6062516df97969f1ae7efd0de9c31eb6ebdceaa0d213b21c0ca1a683", size = 256144 },
+    { url = "https://files.pythonhosted.org/packages/c5/8b/fb9425c4684066c79e863f1e6e7ecebb49e3a64d9f7f7860ef1688c56f4a/coverage-7.9.1-cp313-cp313t-musllinux_1_2_i686.whl", hash = "sha256:7931b9e249edefb07cd6ae10c702788546341d5fe44db5b6108a25da4dca513f", size = 254257 },
+    { url = "https://files.pythonhosted.org/packages/93/df/27b882f54157fc1131e0e215b0da3b8d608d9b8ef79a045280118a8f98fe/coverage-7.9.1-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:52e92b01041151bf607ee858e5a56c62d4b70f4dac85b8c8cb7fb8a351ab2c10", size = 255094 },
+    { url = "https://files.pythonhosted.org/packages/41/5f/cad1c3dbed8b3ee9e16fa832afe365b4e3eeab1fb6edb65ebbf745eabc92/coverage-7.9.1-cp313-cp313t-win32.whl", hash = "sha256:684e2110ed84fd1ca5f40e89aa44adf1729dc85444004111aa01866507adf363", size = 215437 },
+    { url = "https://files.pythonhosted.org/packages/99/4d/fad293bf081c0e43331ca745ff63673badc20afea2104b431cdd8c278b4c/coverage-7.9.1-cp313-cp313t-win_amd64.whl", hash = "sha256:437c576979e4db840539674e68c84b3cda82bc824dd138d56bead1435f1cb5d7", size = 216605 },
+    { url = "https://files.pythonhosted.org/packages/1f/56/4ee027d5965fc7fc126d7ec1187529cc30cc7d740846e1ecb5e92d31b224/coverage-7.9.1-cp313-cp313t-win_arm64.whl", hash = "sha256:18a0912944d70aaf5f399e350445738a1a20b50fbea788f640751c2ed9208b6c", size = 214392 },
+    { url = "https://files.pythonhosted.org/packages/3e/e5/c723545c3fd3204ebde3b4cc4b927dce709d3b6dc577754bb57f63ca4a4a/coverage-7.9.1-pp39.pp310.pp311-none-any.whl", hash = "sha256:db0f04118d1db74db6c9e1cb1898532c7dcc220f1d2718f058601f7c3f499514", size = 204009 },
+    { url = "https://files.pythonhosted.org/packages/08/b8/7ddd1e8ba9701dea08ce22029917140e6f66a859427406579fd8d0ca7274/coverage-7.9.1-py3-none-any.whl", hash = "sha256:66b974b145aa189516b6bf2d8423e888b742517d37872f6ee4c5be0073bd9a3c", size = 204000 },
+]
+
+[package.optional-dependencies]
+toml = [
+    { name = "tomli", marker = "python_full_version <= '3.11'" },
+]
+
 [[package]]
 name = "exceptiongroup"
 version = "1.3.0"
@@ -356,6 +427,20 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/30/05/ce271016e351fddc8399e546f6e23761967ee09c8c568bbfbecb0c150171/pytest_asyncio-1.0.0-py3-none-any.whl", hash = "sha256:4f024da9f1ef945e680dc68610b52550e36590a67fd31bb3b4943979a1f90ef3", size = 15976 },
 ]
 
+[[package]]
+name = "pytest-cov"
+version = "6.2.1"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "coverage", extra = ["toml"] },
+    { name = "pluggy" },
+    { name = "pytest" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/18/99/668cade231f434aaa59bbfbf49469068d2ddd945000621d3d165d2e7dd7b/pytest_cov-6.2.1.tar.gz", hash = "sha256:25cc6cc0a5358204b8108ecedc51a9b57b34cc6b8c967cc2c01a4e00d8a67da2", size = 69432 }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/bc/16/4ea354101abb1287856baa4af2732be351c7bee728065aed451b678153fd/pytest_cov-6.2.1-py3-none-any.whl", hash = "sha256:f5bc4c23f42f1cdd23c70b1dab1bbaef4fc505ba950d53e0081d0730dd7e86d5", size = 24644 },
+]
+
 [[package]]
 name = "pytest-httpx"
 version = "0.35.0"
@@ -369,6 +454,15 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/b0/ed/026d467c1853dd83102411a78126b4842618e86c895f93528b0528c7a620/pytest_httpx-0.35.0-py3-none-any.whl", hash = "sha256:ee11a00ffcea94a5cbff47af2114d34c5b231c326902458deed73f9c459fd744", size = 19442 },
 ]
 
+[[package]]
+name = "python-ulid"
+version = "3.0.0"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/9a/db/e5e67aeca9c2420cb91f94007f30693cc3628ae9783a565fd33ffb3fbfdd/python_ulid-3.0.0.tar.gz", hash = "sha256:e50296a47dc8209d28629a22fc81ca26c00982c78934bd7766377ba37ea49a9f", size = 28822 }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/63/4e/cc2ba2c0df2589f35a4db8473b8c2ba9bbfc4acdec4a94f1c78934d2350f/python_ulid-3.0.0-py3-none-any.whl", hash = "sha256:e4c4942ff50dbd79167ad01ac725ec58f924b4018025ce22c858bfcff99a5e31", size = 11194 },
+]
+
 [[package]]
 name = "ruff"
 version = "0.11.13"
@@ -462,12 +556,3 @@ sdist = { url = "https://files.pythonhosted.org/packages/f8/b1/0c11f5058406b3af7
 wheels = [
     { url = "https://files.pythonhosted.org/packages/17/69/cd203477f944c353c31bade965f880aa1061fd6bf05ded0726ca845b6ff7/typing_inspection-0.4.1-py3-none-any.whl", hash = "sha256:389055682238f53b04f7badcb49b989835495a96700ced5dab2d8feae4b26f51", size = 14552 },
 ]
-
-[[package]]
-name = "ulid-py"
-version = "1.1.0"
-source = { registry = "https://pypi.org/simple" }
-sdist = { url = "https://files.pythonhosted.org/packages/3b/53/d14a8ec344048e21431821cb49e9a6722384f982b889c2dd449428dbdcc1/ulid-py-1.1.0.tar.gz", hash = "sha256:dc6884be91558df077c3011b9fb0c87d1097cb8fc6534b11f310161afd5738f0", size = 22514 }
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/42/7c/a12c879fe6c2b136a718c142115ff99397fbf62b4929d970d58ae386d55f/ulid_py-1.1.0-py2.py3-none-any.whl", hash = "sha256:b56a0f809ef90d6020b21b89a87a48edc7c03aea80e5ed5174172e82d76e3987", size = 25753 },
-]
diff --git a/agent_memory_server/api.py b/agent_memory_server/api.py
index a7fe7dc..2f594ef 100644
--- a/agent_memory_server/api.py
+++ b/agent_memory_server/api.py
@@ -14,6 +14,7 @@
     AckResponse,
     CreateMemoryRecordRequest,
     GetSessionsQuery,
+    MemoryMessage,
     MemoryPromptRequest,
     MemoryPromptResponse,
     MemoryRecordResultsResponse,
@@ -34,67 +35,109 @@
 router = APIRouter()
 
 
-def _get_effective_window_size(
-    window_size: int,
-    context_window_max: int | None,
+def _get_effective_token_limit(
     model_name: ModelNameLiteral | None,
+    context_window_max: int | None,
 ) -> int:
+    """Calculate the effective token limit for working memory based on model context window."""
     # If context_window_max is explicitly provided, use that
     if context_window_max is not None:
-        effective_window_size = min(window_size, context_window_max)
+        return context_window_max
     # If model_name is provided, get its max_tokens from our config
-    elif model_name is not None:
+    if model_name is not None:
         model_config = get_model_config(model_name)
-        effective_window_size = min(window_size, model_config.max_tokens)
-    # Otherwise use the default window_size
-    else:
-        effective_window_size = window_size
-    return effective_window_size
+        return model_config.max_tokens
+    # Otherwise use a conservative default (GPT-3.5 context window)
+    return 16000  # Conservative default
+
+
+def _calculate_messages_token_count(messages: list[MemoryMessage]) -> int:
+    """Calculate total token count for a list of messages."""
+    encoding = tiktoken.get_encoding("cl100k_base")
+    total_tokens = 0
+
+    for msg in messages:
+        msg_str = f"{msg.role}: {msg.content}"
+        msg_tokens = len(encoding.encode(msg_str))
+        total_tokens += msg_tokens
+
+    return total_tokens
 
 
 async def _summarize_working_memory(
     memory: WorkingMemory,
-    window_size: int,
+    model_name: ModelNameLiteral | None = None,
+    context_window_max: int | None = None,
     model: str = settings.generation_model,
 ) -> WorkingMemory:
     """
-    Summarize working memory when it exceeds the window size.
+    Summarize working memory when it exceeds token limits.
 
     Args:
         memory: The working memory to potentially summarize
-        window_size: Maximum number of messages to keep
+        model_name: The client's LLM model name for context window determination
+        context_window_max: Direct specification of context window max tokens
         model: The model to use for summarization
 
     Returns:
         Updated working memory with summary and trimmed messages
     """
-    if len(memory.messages) <= window_size:
+    # Calculate current token usage
+    current_tokens = _calculate_messages_token_count(memory.messages)
+
+    # Get effective token limit for the client's model
+    max_tokens = _get_effective_token_limit(model_name, context_window_max)
+
+    # Reserve space for new messages, function calls, and response generation
+    # Use 70% of context window to leave room for new content
+    token_threshold = int(max_tokens * 0.7)
+
+    if current_tokens <= token_threshold:
         return memory
 
     # Get model client for summarization
     client = await get_model_client(model)
     model_config = get_model_config(model)
-    max_tokens = model_config.max_tokens
+    summarization_max_tokens = model_config.max_tokens
 
-    # Token allocation (same logic as original summarize_session)
-    if max_tokens < 10000:
-        summary_max_tokens = max(512, max_tokens // 8)  # 12.5%
-    elif max_tokens < 50000:
-        summary_max_tokens = max(1024, max_tokens // 10)  # 10%
+    # Token allocation for summarization (same logic as original summarize_session)
+    if summarization_max_tokens < 10000:
+        summary_max_tokens = max(512, summarization_max_tokens // 8)  # 12.5%
+    elif summarization_max_tokens < 50000:
+        summary_max_tokens = max(1024, summarization_max_tokens // 10)  # 10%
     else:
-        summary_max_tokens = max(2048, max_tokens // 20)  # 5%
+        summary_max_tokens = max(2048, summarization_max_tokens // 20)  # 5%
 
-    buffer_tokens = min(max(230, max_tokens // 100), 1000)
-    max_message_tokens = max_tokens - summary_max_tokens - buffer_tokens
+    buffer_tokens = min(max(230, summarization_max_tokens // 100), 1000)
+    max_message_tokens = summarization_max_tokens - summary_max_tokens - buffer_tokens
 
     encoding = tiktoken.get_encoding("cl100k_base")
     total_tokens = 0
     messages_to_summarize = []
 
-    # Calculate how many messages from the beginning we should summarize
-    # Keep the most recent messages within window_size
+    # We want to keep recent messages that fit in our target token budget
+    target_remaining_tokens = int(
+        max_tokens * 0.4
+    )  # Keep 40% of context for recent messages
+
+    # Work backwards from the end to find how many recent messages we can keep
+    recent_messages_tokens = 0
+    keep_count = 0
+
+    for i in range(len(memory.messages) - 1, -1, -1):
+        msg = memory.messages[i]
+        msg_str = f"{msg.role}: {msg.content}"
+        msg_tokens = len(encoding.encode(msg_str))
+
+        if recent_messages_tokens + msg_tokens <= target_remaining_tokens:
+            recent_messages_tokens += msg_tokens
+            keep_count += 1
+        else:
+            break
+
+    # Messages to summarize are the ones we're not keeping
     messages_to_check = (
-        memory.messages[:-window_size] if len(memory.messages) > window_size else []
+        memory.messages[:-keep_count] if keep_count > 0 else memory.messages[:-1]
     )
 
     for msg in messages_to_check:
@@ -125,12 +168,12 @@ async def _summarize_working_memory(
     )
 
     # Update working memory with new summary and trimmed messages
-    # Keep only the most recent messages within window_size
+    # Keep only the most recent messages that fit in our token budget
     updated_memory = memory.model_copy(deep=True)
     updated_memory.context = summary
-    updated_memory.messages = memory.messages[
-        -window_size:
-    ]  # Keep most recent messages
+    updated_memory.messages = (
+        memory.messages[-keep_count:] if keep_count > 0 else [memory.messages[-1]]
+    )
     updated_memory.tokens = memory.tokens + summary_tokens_used
 
     return updated_memory
@@ -166,10 +209,10 @@ async def list_sessions(
 
 
 @router.get("/v1/working-memory/{session_id}", response_model=WorkingMemoryResponse)
-async def get_session_memory(
+async def get_working_memory(
     session_id: str,
     namespace: str | None = None,
-    window_size: int = settings.window_size,
+    window_size: int = settings.window_size,  # Deprecated: kept for backward compatibility
     model_name: ModelNameLiteral | None = None,
     context_window_max: int | None = None,
     current_user: UserInfo = Depends(get_current_user),
@@ -178,11 +221,12 @@ async def get_session_memory(
     Get working memory for a session.
 
     This includes stored conversation messages, context, and structured memory records.
+    If the messages exceed the token limit, older messages will be truncated.
 
     Args:
         session_id: The session ID
         namespace: The namespace to use for the session
-        window_size: The number of messages to include in the response
+        window_size: DEPRECATED - The number of messages to include (kept for backward compatibility)
         model_name: The client's LLM model name (will determine context window size if provided)
         context_window_max: Direct specification of the context window max tokens (overrides model_name)
 
@@ -190,11 +234,6 @@ async def get_session_memory(
         Working memory containing messages, context, and structured memory records
     """
     redis = await get_redis_conn()
-    effective_window_size = _get_effective_window_size(
-        window_size=window_size,
-        context_window_max=context_window_max,
-        model_name=model_name,
-    )
 
     # Get unified working memory
     working_mem = await working_memory.get_working_memory(
@@ -212,33 +251,52 @@ async def get_session_memory(
             namespace=namespace,
         )
 
-    # Apply window size to messages if needed
-    if len(working_mem.messages) > effective_window_size:
-        working_mem.messages = working_mem.messages[-effective_window_size:]
+    # Apply token-based truncation if we have messages and model info
+    if working_mem.messages and (model_name or context_window_max):
+        token_limit = _get_effective_token_limit(model_name, context_window_max)
+        current_token_count = _calculate_messages_token_count(working_mem.messages)
+
+        # If we exceed the token limit, truncate from the beginning (keep recent messages)
+        if current_token_count > token_limit:
+            # Keep removing oldest messages until we're under the limit
+            truncated_messages = working_mem.messages[:]
+            while len(truncated_messages) > 1:  # Always keep at least 1 message
+                truncated_messages = truncated_messages[1:]  # Remove oldest
+                if _calculate_messages_token_count(truncated_messages) <= token_limit:
+                    break
+            working_mem.messages = truncated_messages
+
+    # Fallback to message-count truncation for backward compatibility
+    elif len(working_mem.messages) > window_size:
+        working_mem.messages = working_mem.messages[-window_size:]
 
     return working_mem
 
 
 @router.put("/v1/working-memory/{session_id}", response_model=WorkingMemoryResponse)
-async def put_session_memory(
+async def put_working_memory(
     session_id: str,
     memory: WorkingMemory,
+    model_name: ModelNameLiteral | None = None,
+    context_window_max: int | None = None,
     background_tasks=Depends(get_background_tasks),
     current_user: UserInfo = Depends(get_current_user),
 ):
     """
     Set working memory for a session. Replaces existing working memory.
 
-    If the message count exceeds the window size, messages will be summarized
+    If the token count exceeds the context window threshold, messages will be summarized
     immediately and the updated memory state returned to the client.
 
     Args:
         session_id: The session ID
         memory: Working memory to save
+        model_name: The client's LLM model name for context window determination
+        context_window_max: Direct specification of context window max tokens
         background_tasks: DocketBackgroundTasks instance (injected automatically)
 
     Returns:
-        Updated working memory (potentially with summary if messages were condensed)
+        Updated working memory (potentially with summary if tokens were condensed)
     """
     redis = await get_redis_conn()
 
@@ -253,10 +311,12 @@ async def put_session_memory(
                 detail="All memory records in working memory must have an ID",
             )
 
-    # Handle summarization if needed (before storing)
+    # Handle summarization if needed (before storing) - now token-based
     updated_memory = memory
-    if memory.messages and len(memory.messages) > settings.window_size:
-        updated_memory = await _summarize_working_memory(memory, settings.window_size)
+    if memory.messages:
+        updated_memory = await _summarize_working_memory(
+            memory, model_name=model_name, context_window_max=context_window_max
+        )
 
     await working_memory.set_working_memory(
         working_memory=updated_memory,
@@ -297,7 +357,7 @@ async def put_session_memory(
 
 
 @router.delete("/v1/working-memory/{session_id}", response_model=AckResponse)
-async def delete_session_memory(
+async def delete_working_memory(
     session_id: str,
     namespace: str | None = None,
     current_user: UserInfo = Depends(get_current_user),
@@ -482,11 +542,19 @@ async def memory_prompt(
     _messages = []
 
     if params.session:
-        effective_window_size = _get_effective_window_size(
-            window_size=params.session.window_size,
-            context_window_max=params.session.context_window_max,
-            model_name=params.session.model_name,
-        )
+        # Use token limit for memory prompt, fallback to message count for backward compatibility
+        if params.session.model_name or params.session.context_window_max:
+            token_limit = _get_effective_token_limit(
+                model_name=params.session.model_name,
+                context_window_max=params.session.context_window_max,
+            )
+            effective_window_size = (
+                token_limit  # We'll handle token-based truncation below
+            )
+        else:
+            effective_window_size = (
+                params.session.window_size
+            )  # Fallback to message count
         working_mem = await working_memory.get_working_memory(
             session_id=params.session.session_id,
             namespace=params.session.namespace,
@@ -504,12 +572,31 @@ async def memory_prompt(
                         ),
                     )
                 )
-            # Apply window size and ignore past system messages as the latest context may have changed
-            recent_messages = (
-                working_mem.messages[-effective_window_size:]
-                if len(working_mem.messages) > effective_window_size
-                else working_mem.messages
-            )
+            # Apply token-based or message-based truncation
+            if params.session.model_name or params.session.context_window_max:
+                # Token-based truncation
+                if (
+                    _calculate_messages_token_count(working_mem.messages)
+                    > effective_window_size
+                ):
+                    # Keep removing oldest messages until we're under the limit
+                    recent_messages = working_mem.messages[:]
+                    while len(recent_messages) > 1:  # Always keep at least 1 message
+                        recent_messages = recent_messages[1:]  # Remove oldest
+                        if (
+                            _calculate_messages_token_count(recent_messages)
+                            <= effective_window_size
+                        ):
+                            break
+                else:
+                    recent_messages = working_mem.messages
+            else:
+                # Message-based truncation (backward compatibility)
+                recent_messages = (
+                    working_mem.messages[-effective_window_size:]
+                    if len(working_mem.messages) > effective_window_size
+                    else working_mem.messages
+                )
             for msg in recent_messages:
                 if msg.role == "user":
                     msg_class = base.UserMessage
diff --git a/agent_memory_server/config.py b/agent_memory_server/config.py
index 1b8fe3b..e5197b5 100644
--- a/agent_memory_server/config.py
+++ b/agent_memory_server/config.py
@@ -56,7 +56,7 @@ class Settings(BaseSettings):
     use_docket: bool = True
 
     # OAuth2/JWT Authentication settings
-    disable_auth: bool = False
+    disable_auth: bool = True
     oauth2_issuer_url: str | None = None
     oauth2_audience: str | None = None
     oauth2_jwks_url: str | None = None
@@ -72,6 +72,7 @@ class Settings(BaseSettings):
     class Config:
         env_file = ".env"
         env_file_encoding = "utf-8"
+        extra = "ignore"  # Ignore extra fields in YAML/env
 
 
 # Load YAML config first, then let env vars override
diff --git a/agent_memory_server/extraction.py b/agent_memory_server/extraction.py
index 511c670..bed9682 100644
--- a/agent_memory_server/extraction.py
+++ b/agent_memory_server/extraction.py
@@ -9,7 +9,7 @@
 from tenacity.asyncio import AsyncRetrying
 from tenacity.stop import stop_after_attempt
 from transformers import AutoModelForTokenClassification, AutoTokenizer, pipeline
-from ulid import ULID
+import ulid
 
 from agent_memory_server.config import settings
 from agent_memory_server.llms import (
@@ -333,7 +333,7 @@ async def extract_discrete_memories(
     if discrete_memories:
         long_term_memories = [
             MemoryRecord(
-                id_=str(ULID()),
+                id_=str(ulid.new()),
                 text=new_memory["text"],
                 memory_type=new_memory.get("type", "episodic"),
                 topics=new_memory.get("topics", []),
diff --git a/agent_memory_server/mcp.py b/agent_memory_server/mcp.py
index fcc35b9..9d171ff 100644
--- a/agent_memory_server/mcp.py
+++ b/agent_memory_server/mcp.py
@@ -3,12 +3,12 @@
 from typing import Any
 
 from mcp.server.fastmcp import FastMCP as _FastMCPBase
-from ulid import ULID
+import ulid
 
 from agent_memory_server.api import (
     create_long_term_memory as core_create_long_term_memory,
     memory_prompt as core_memory_prompt,
-    put_session_memory as core_put_session_memory,
+    put_working_memory as core_put_working_memory,
     search_long_term_memory as core_search_long_term_memory,
 )
 from agent_memory_server.config import settings
@@ -690,7 +690,7 @@ async def set_working_memory(
             # Handle both MemoryRecord objects and dict inputs
             if isinstance(memory, MemoryRecord):
                 # Already a MemoryRecord object, ensure it has an ID
-                memory_id = memory.id or str(ULID())
+                memory_id = memory.id or str(ulid.new())
                 processed_memory = memory.model_copy(
                     update={
                         "id": memory_id,
@@ -701,7 +701,7 @@ async def set_working_memory(
                 # Dictionary input, convert to MemoryRecord
                 memory_dict = dict(memory)
                 if not memory_dict.get("id"):
-                    memory_dict["id"] = str(ULID())
+                    memory_dict["id"] = str(ulid.new())
                 memory_dict["persisted_at"] = None
                 processed_memory = MemoryRecord(**memory_dict)
 
@@ -720,7 +720,7 @@ async def set_working_memory(
     )
 
     # Update working memory via the API - this handles summarization and background promotion
-    result = await core_put_session_memory(
+    result = await core_put_working_memory(
         session_id=session_id,
         memory=working_memory_obj,
         background_tasks=get_background_tasks(),
diff --git a/agent_memory_server/migrations.py b/agent_memory_server/migrations.py
index 70e5f30..b2fe381 100644
--- a/agent_memory_server/migrations.py
+++ b/agent_memory_server/migrations.py
@@ -3,7 +3,7 @@
 """
 
 from redis.asyncio import Redis
-from ulid import ULID
+import ulid
 
 from agent_memory_server.logging import get_logger
 from agent_memory_server.long_term_memory import generate_memory_hash
@@ -98,7 +98,7 @@ async def migrate_add_discrete_memory_extracted_2(redis: Redis | None = None) ->
         id_ = await redis.hget(name=key, key="id_")  # type: ignore
         if not id_:
             logger.info("Updating memory with no ID to set ID")
-            await redis.hset(name=key, key="id_", value=str(ULID()))  # type: ignore
+            await redis.hset(name=key, key="id_", value=str(ulid.new()))  # type: ignore
         # extracted: bytes | None = await redis.hget(
         #     name=key, key="discrete_memory_extracted"
         # )  # type: ignore
@@ -126,7 +126,7 @@ async def migrate_add_memory_type_3(redis: Redis | None = None) -> None:
         id_ = await redis.hget(name=key, key="id_")  # type: ignore
         if not id_:
             logger.info("Updating memory with no ID to set ID")
-            await redis.hset(name=key, key="id_", value=str(ULID()))  # type: ignore
+            await redis.hset(name=key, key="id_", value=str(ulid.new()))  # type: ignore
         memory_type: bytes | None = await redis.hget(name=key, key="memory_type")  # type: ignore
         if not memory_type:
             await redis.hset(name=key, key="memory_type", value="message")  # type: ignore
diff --git a/agent_memory_server/utils/redis.py b/agent_memory_server/utils/redis.py
index c29ac41..185a558 100644
--- a/agent_memory_server/utils/redis.py
+++ b/agent_memory_server/utils/redis.py
@@ -88,7 +88,7 @@ async def ensure_search_index_exists(
     index_name: str = settings.redisvl_index_name,
     vector_dimensions: str = settings.redisvl_vector_dimensions,
     distance_metric: str = settings.redisvl_distance_metric,
-    overwrite: bool = False,
+    overwrite: bool = True,
 ) -> None:
     """
     Ensure that the async search index exists, create it if it doesn't.
diff --git a/examples/README.md b/examples/README.md
new file mode 100644
index 0000000..8def895
--- /dev/null
+++ b/examples/README.md
@@ -0,0 +1,51 @@
+# Examples
+
+This directory contains example implementations showing how to use the Agent Memory Server.
+
+## Travel Agent (`travel_agent.py`)
+
+A comprehensive travel assistant that demonstrates:
+
+### Core Features
+- **Automatic Tool Discovery**: Uses `MemoryAPIClient.get_all_memory_tool_schemas()` to automatically discover and integrate all available memory tools
+- **Unified Tool Resolution**: Leverages `client.resolve_tool_call()` to handle all memory tool calls uniformly across different LLM providers
+- **Working Memory Management**: Session-based conversation state and structured memory storage
+- **Long-term Memory**: Persistent memory storage and semantic search capabilities
+- **Optional Web Search**: Cached web search using Tavily API with Redis caching
+
+### Available Tools
+The travel agent automatically discovers and uses all memory tools available from the client:
+
+1. **search_memory** - Search through previous conversations and stored information
+2. **get_working_memory** - Check current session state, stored memories, and data  
+3. **add_memory_to_working_memory** - Store important information as structured memories
+4. **update_working_memory_data** - Store/update session-specific data like trip plans
+
+Plus optional:
+- **web_search** - Search the internet for current travel information (requires TAVILY_API_KEY)
+
+### Usage
+
+```bash
+# Basic usage
+python travel_agent.py
+
+# With custom session
+python travel_agent.py --session-id my_trip --user-id john_doe
+
+# With custom memory server
+python travel_agent.py --memory-server-url http://localhost:8001
+```
+
+### Environment Variables
+- `OPENAI_API_KEY` - Required for OpenAI ChatGPT
+- `TAVILY_API_KEY` - Optional for web search functionality  
+- `MEMORY_SERVER_URL` - Memory server URL (default: http://localhost:8000)
+- `REDIS_URL` - Redis URL for caching (default: redis://localhost:6379)
+
+### Key Implementation Details
+- **Tool Auto-Discovery**: Uses the client's built-in tool management for maximum compatibility
+- **Provider Agnostic**: Tool resolution works with OpenAI, Anthropic, and other LLM providers
+- **Error Handling**: Robust error handling for tool calls and network issues
+- **Logging**: Comprehensive logging shows which tools are available and being used
+
diff --git a/examples/travel_agent.ipynb b/examples/travel_agent.ipynb
index d7490cb..290d708 100644
--- a/examples/travel_agent.ipynb
+++ b/examples/travel_agent.ipynb
@@ -1,1602 +1,1601 @@
 {
-  "cells": [
-    {
-      "cell_type": "markdown",
-      "metadata": {},
-      "source": [
-        "![Redis](https://redis.io/wp-content/uploads/2024/04/Logotype.svg?auto=webp&quality=85,75&width=120)\n",
-        "\n",
-        "# [WIP] Agent Memory Using Agent Memory Server\n",
-        "This notebook demonstrates how to manage short-term and long-term agent memory using Redis and [Agent Memory Server](https://github.com/redis-developer/agent-memory-server). We'll explore:\n",
-        "\n",
-        "1. Working memory management\n",
-        "2. Long-term memory storage and retrieval\n",
-        "3. Managing memory manually vs. exposing tool access\n",
-        "4. Managing conversation history size with summarization\n",
-        "5. Consolidating similar/duplicate long-term memories\n",
-        "\n",
-        "## What We'll Build\n",
-        "\n",
-        "We're going to build two versions of a travel agent, one that manages long-term\n",
-        "memory manually and one that does so using tools the LLM calls.\n",
-        "\n",
-        "## Let's Begin!\n",
-        "<a href=\"https://colab.research.google.com/github/redis-developer/redis-ai-resources/blob/main/python-recipes/agents/03_memory_agent.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {},
-      "source": [
-        "## Setup\n",
-        "\n",
-        "### Packages"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {},
-      "outputs": [],
-      "source": [
-        "%pip install -q tavily-python pydantic agent-memory-server"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {},
-      "source": [
-        "### Required API Keys\n",
-        "\n",
-        "You must add an OpenAI API key with billing information for this lesson. You will also need\n",
-        "a Tavily API key. Tavily API keys come with free credits at the time of this writing."
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": 19,
-      "metadata": {},
-      "outputs": [],
-      "source": [
-        "# NBVAL_SKIP\n",
-        "import getpass\n",
-        "import os\n",
-        "\n",
-        "\n",
-        "def _set_env(key: str):\n",
-        "    if key not in os.environ:\n",
-        "        os.environ[key] = getpass.getpass(f\"{key}:\")\n",
-        "\n",
-        "\n",
-        "_set_env(\"OPENAI_API_KEY\")\n",
-        "\n",
-        "# Uncomment this if you have a Tavily API key and want to\n",
-        "# use the web search tool.\n",
-        "# _set_env(\"TAVILY_API_KEY\")"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {},
-      "source": [
-        "## Run redis\n",
-        "\n",
-        "### For colab\n",
-        "\n",
-        "Convert the following cell to Python to run it in Colab."
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": 10,
-      "metadata": {},
-      "outputs": [],
-      "source": [
-        "%%sh\n",
-        "# Exit if this is not running in Colab\n",
-        "if [ -z \"$COLAB_RELEASE_TAG\" ]; then\n",
-        "  exit 0\n",
-        "fi\n",
-        "\n",
-        "curl -fsSL https://packages.redis.io/gpg | sudo gpg --dearmor -o /usr/share/keyrings/redis-archive-keyring.gpg\n",
-        "echo \"deb [signed-by=/usr/share/keyrings/redis-archive-keyring.gpg] https://packages.redis.io/deb $(lsb_release -cs) main\" | sudo tee /etc/apt/sources.list.d/redis.list\n",
-        "sudo apt-get update  > /dev/null 2>&1\n",
-        "sudo apt-get install redis-stack-server  > /dev/null 2>&1\n",
-        "redis-stack-server --daemonize yes"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {},
-      "source": [
-        "#### For Alternative Environments\n",
-        "There are many ways to get the necessary redis-stack instance running\n",
-        "1. On cloud, deploy a [FREE instance of Redis in the cloud](https://redis.com/try-free/). Or, if you have your\n",
-        "own version of Redis Enterprise running, that works too!\n",
-        "2. Per OS, [see the docs](https://redis.io/docs/latest/operate/oss_and_stack/install/install-stack/)\n",
-        "3. With docker: `docker run -d --name redis -p 6379:6379 redis:8.1`\n",
-        "\n",
-        "## Test connection"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {},
-      "outputs": [],
-      "source": [
-        "import os\n",
-        "from redis import Redis\n",
-        "\n",
-        "# Use the environment variable if set, otherwise default to localhost\n",
-        "REDIS_URL = os.getenv(\"REDIS_URL\", \"redis://localhost:6379\")\n",
-        "\n",
-        "redis_client = Redis.from_url(REDIS_URL)\n",
-        "redis_client.ping()"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {},
-      "source": [
-        "## Short-Term vs. Long-Term Memory\n",
-        "\n",
-        "The agent uses **short-term memory** and **long-term memory**. The implementations\n",
-        "of short-term and long-term memory differ, as does how the agent uses them. Let's\n",
-        "dig into the details. We'll return to code soon!\n",
-        "\n",
-        "### Short-Term Memory\n",
-        "\n",
-        "For short-term memory, the agent keeps track of conversation history with Redis.\n",
-        "Because this is a LangGraph agent, we use the `RedisSaver` class to achieve\n",
-        "this. `RedisSaver` is what LangGraph refers to as a _checkpointer_. You can read\n",
-        "more about checkpointers in the [LangGraph\n",
-        "documentation](https://langchain-ai.github.io/langgraph/concepts/persistence/).\n",
-        "In short, they store state for each node in the graph, which for this agent\n",
-        "includes conversation history.\n",
-        "\n",
-        "Here's a diagram showing how the agent uses Redis for short-term memory. Each node\n",
-        "in the graph (Retrieve Users, Respond, Summarize Conversation) persists its \"state\"\n",
-        "to Redis. The state object contains the agent's message conversation history for\n",
-        "the current thread.\n",
-        "\n",
-        "<img src=\"../../assets/short-term-memory.png\" style=\"width: 100%; max-width: 400px;\">\n",
-        "\n",
-        "If Redis persistence is on, then Redis will persist short-term memory to\n",
-        "disk. This means if you quit the agent and return with the same thread ID and\n",
-        "user ID, you'll resume the same conversation.\n",
-        "\n",
-        "Conversation histories can grow long and pollute an LLM's context window. To manage\n",
-        "this, after every \"turn\" of a conversation, the agent summarizes messages when the\n",
-        "conversation grows past a configurable threshold. Checkpointers do not do this by\n",
-        "default, so we've created a node in the graph for summarization.\n",
-        "\n",
-        "**NOTE**: We'll see example code for the summarization node later in this notebook.\n",
-        "\n",
-        "### Long-Term Memory\n",
-        "\n",
-        "Aside from conversation history, the agent stores long-term memories in a search\n",
-        "index in Redis, using [RedisVL](https://docs.redisvl.com/en/latest/). Here's a\n",
-        "diagram showing the components involved:\n",
-        "\n",
-        "<img src=\"../../assets/long-term-memory.png\" style=\"width: 100%; max-width: 600px;\">\n",
-        "\n",
-        "The agent tracks two types of long-term memories:\n",
-        "\n",
-        "- **Episodic**: User-specific experiences and preferences\n",
-        "- **Semantic**: General knowledge about travel destinations and requirements\n",
-        "\n",
-        "**NOTE** If you're familiar with the [CoALA\n",
-        "paper](https://arxiv.org/abs/2309.02427), the terms \"episodic\" and \"semantic\"\n",
-        "here map to the same concepts in the paper. CoALA discusses a third type of\n",
-        "memory, _procedural_. In our example, we consider logic encoded in Python in the\n",
-        "agent codebase to be its procedural memory.\n",
-        "\n",
-        "### Representing Long-Term Memory in Python\n",
-        "We use a couple of Pydantic models to represent long-term memories, both before\n",
-        "and after they're stored in Redis:"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": 12,
-      "metadata": {},
-      "outputs": [],
-      "source": [
-        "from datetime import datetime\n",
-        "from enum import Enum\n",
-        "from typing import List, Optional\n",
-        "\n",
-        "from pydantic import BaseModel, Field\n",
-        "import ulid\n",
-        "\n",
-        "\n",
-        "class MemoryType(str, Enum):\n",
-        "    \"\"\"\n",
-        "    The type of a long-term memory.\n",
-        "\n",
-        "    EPISODIC: User specific experiences and preferences\n",
-        "\n",
-        "    SEMANTIC: General knowledge on top of the user's preferences and LLM's\n",
-        "    training data.\n",
-        "    \"\"\"\n",
-        "\n",
-        "    EPISODIC = \"episodic\"\n",
-        "    SEMANTIC = \"semantic\"\n",
-        "\n",
-        "\n",
-        "class Memory(BaseModel):\n",
-        "    \"\"\"Represents a single long-term memory.\"\"\"\n",
-        "\n",
-        "    content: str\n",
-        "    memory_type: MemoryType\n",
-        "    metadata: str\n",
-        "    \n",
-        "    \n",
-        "class Memories(BaseModel):\n",
-        "    \"\"\"\n",
-        "    A list of memories extracted from a conversation by an LLM.\n",
-        "\n",
-        "    NOTE: OpenAI's structured output requires us to wrap the list in an object.\n",
-        "    \"\"\"\n",
-        "\n",
-        "    memories: List[Memory]\n",
-        "\n",
-        "\n",
-        "class StoredMemory(Memory):\n",
-        "    \"\"\"A stored long-term memory\"\"\"\n",
-        "\n",
-        "    id: str  # The redis key\n",
-        "    memory_id: ulid.ULID = Field(default_factory=lambda: ulid.ULID())\n",
-        "    created_at: datetime = Field(default_factory=datetime.now)\n",
-        "    user_id: Optional[str] = None\n",
-        "    thread_id: Optional[str] = None\n",
-        "    memory_type: Optional[MemoryType] = None\n",
-        "    \n",
-        "    \n",
-        "class MemoryStrategy(str, Enum):\n",
-        "    \"\"\"\n",
-        "    Supported strategies for managing long-term memory.\n",
-        "    \n",
-        "    This notebook supports two strategies for working with long-term memory:\n",
-        "\n",
-        "    TOOLS: The LLM decides when to store and retrieve long-term memories, using\n",
-        "    tools (AKA, function-calling) to do so.\n",
-        "\n",
-        "    MANUAL: The agent manually retrieves long-term memories relevant to the\n",
-        "    current conversation before sending every message and analyzes every\n",
-        "    response to extract memories to store.\n",
-        "\n",
-        "    NOTE: In both cases, the agent runs a background thread to consolidate\n",
-        "    memories, and a workflow step to summarize conversations after the history\n",
-        "    grows past a threshold.\n",
-        "    \"\"\"\n",
-        "\n",
-        "    TOOLS = \"tools\"\n",
-        "    MANUAL = \"manual\"\n",
-        "    \n",
-        "    \n",
-        "# By default, we'll use the manual strategy\n",
-        "memory_strategy = MemoryStrategy.MANUAL"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {},
-      "source": [
-        "We'll return to these models soon to see them in action!"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {},
-      "source": [
-        "## Short-Term Memory Storage and Retrieval\n",
-        "\n",
-        "The `RedisSaver` class handles the basics of short-term memory storage for us,\n",
-        "so we don't need to do anything here."
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {},
-      "source": [
-        "## Long-Term Memory Storage and Retrieval\n",
-        "\n",
-        "We use RedisVL to store and retrieve long-term memories with vector embeddings.\n",
-        "This allows for semantic search of past experiences and knowledge.\n",
-        "\n",
-        "Let's set up a new search index to store and query memories:"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {},
-      "outputs": [],
-      "source": [
-        "from redisvl.index import SearchIndex\n",
-        "from redisvl.schema.schema import IndexSchema\n",
-        "\n",
-        "# Define schema for long-term memory index\n",
-        "memory_schema = IndexSchema.from_dict({\n",
-        "        \"index\": {\n",
-        "            \"name\": \"agent_memories\",\n",
-        "            \"prefix\": \"memory:\",\n",
-        "            \"key_separator\": \":\",\n",
-        "            \"storage_type\": \"json\",\n",
-        "        },\n",
-        "        \"fields\": [\n",
-        "            {\"name\": \"content\", \"type\": \"text\"},\n",
-        "            {\"name\": \"memory_type\", \"type\": \"tag\"},\n",
-        "            {\"name\": \"metadata\", \"type\": \"text\"},\n",
-        "            {\"name\": \"created_at\", \"type\": \"text\"},\n",
-        "            {\"name\": \"user_id\", \"type\": \"tag\"},\n",
-        "            {\"name\": \"memory_id\", \"type\": \"tag\"},\n",
-        "            {\n",
-        "                \"name\": \"embedding\",\n",
-        "                \"type\": \"vector\",\n",
-        "                \"attrs\": {\n",
-        "                    \"algorithm\": \"flat\",\n",
-        "                    \"dims\": 1536,  # OpenAI embedding dimension\n",
-        "                    \"distance_metric\": \"cosine\",\n",
-        "                    \"datatype\": \"float32\",\n",
-        "                },\n",
-        "            },\n",
-        "        ],\n",
-        "    }\n",
-        ")\n",
-        "\n",
-        "# Create search index\n",
-        "try:\n",
-        "    long_term_memory_index = SearchIndex(\n",
-        "        schema=memory_schema, redis_client=redis_client, overwrite=True\n",
-        "    )\n",
-        "    long_term_memory_index.create()\n",
-        "    print(\"Long-term memory index ready\")\n",
-        "except Exception as e:\n",
-        "    print(f\"Error creating index: {e}\")"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {},
-      "source": [
-        "### Storage and Retrieval Functions\n",
-        "\n",
-        "Now that we have a search index in Redis, we can write functions to store and\n",
-        "retrieve memories. We can use RedisVL to write these.\n",
-        "\n",
-        "First, we'll write a utility function to check if a memory similar to a given\n",
-        "memory already exists in the index. Later, we can use this to avoid storing\n",
-        "duplicate memories.\n",
-        "\n",
-        "#### Checking for Similar Memories"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {},
-      "outputs": [],
-      "source": [
-        "import logging\n",
-        "\n",
-        "from redisvl.query import VectorRangeQuery\n",
-        "from redisvl.query.filter import Tag\n",
-        "from redisvl.utils.vectorize.text.openai import OpenAITextVectorizer\n",
-        "\n",
-        "\n",
-        "logger = logging.getLogger(__name__)\n",
-        "\n",
-        "# If we have any memories that aren't associated with a user, we'll use this ID.\n",
-        "SYSTEM_USER_ID = \"system\"\n",
-        "\n",
-        "openai_embed = OpenAITextVectorizer(model=\"text-embedding-ada-002\")\n",
-        "\n",
-        "# Change this to MemoryStrategy.TOOLS to use function-calling to store and\n",
-        "# retrieve memories.\n",
-        "memory_strategy = MemoryStrategy.MANUAL\n",
-        "\n",
-        "\n",
-        "def similar_memory_exists(\n",
-        "    content: str,\n",
-        "    memory_type: MemoryType,\n",
-        "    user_id: str = SYSTEM_USER_ID,\n",
-        "    thread_id: Optional[str] = None,\n",
-        "    distance_threshold: float = 0.1,\n",
-        ") -> bool:\n",
-        "    \"\"\"Check if a similar long-term memory already exists in Redis.\"\"\"\n",
-        "    query_embedding = openai_embed.embed(content)\n",
-        "    filters = (Tag(\"user_id\") == user_id) & (Tag(\"memory_type\") == memory_type)\n",
-        "    if thread_id:\n",
-        "        filters = filters & (Tag(\"thread_id\") == thread_id)\n",
-        "\n",
-        "    # Search for similar memories\n",
-        "    vector_query = VectorRangeQuery(\n",
-        "        vector=query_embedding,\n",
-        "        num_results=1,\n",
-        "        vector_field_name=\"embedding\",\n",
-        "        filter_expression=filters,\n",
-        "        distance_threshold=distance_threshold,\n",
-        "        return_fields=[\"id\"],\n",
-        "    )\n",
-        "    results = long_term_memory_index.query(vector_query)\n",
-        "    logger.debug(f\"Similar memory search results: {results}\")\n",
-        "\n",
-        "    if results:\n",
-        "        logger.debug(\n",
-        "            f\"{len(results)} similar {'memory' if results.count == 1 else 'memories'} found. First: \"\n",
-        "            f\"{results[0]['id']}. Skipping storage.\"\n",
-        "        )\n",
-        "        return True\n",
-        "\n",
-        "    return False\n"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {},
-      "source": [
-        "#### Storing and Retrieving Long-Term Memories"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {},
-      "source": [
-        "We'll use the `similar_memory_exists()` function when we store memories:"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": 89,
-      "metadata": {},
-      "outputs": [],
-      "source": [
-        "\n",
-        "from datetime import datetime\n",
-        "from typing import List, Optional, Union\n",
-        "\n",
-        "import ulid\n",
-        "\n",
-        "\n",
-        "def store_memory(\n",
-        "    content: str,\n",
-        "    memory_type: MemoryType,\n",
-        "    user_id: str = SYSTEM_USER_ID,\n",
-        "    thread_id: Optional[str] = None,\n",
-        "    metadata: Optional[str] = None,\n",
-        "):\n",
-        "    \"\"\"Store a long-term memory in Redis, avoiding duplicates.\"\"\"\n",
-        "    if metadata is None:\n",
-        "        metadata = \"{}\"\n",
-        "\n",
-        "    logger.info(f\"Preparing to store memory: {content}\")\n",
-        "\n",
-        "    if similar_memory_exists(content, memory_type, user_id, thread_id):\n",
-        "        logger.info(\"Similar memory found, skipping storage\")\n",
-        "        return\n",
-        "\n",
-        "    embedding = openai_embed.embed(content)\n",
-        "\n",
-        "    memory_data = {\n",
-        "        \"user_id\": user_id or SYSTEM_USER_ID,\n",
-        "        \"content\": content,\n",
-        "        \"memory_type\": memory_type.value,\n",
-        "        \"metadata\": metadata,\n",
-        "        \"created_at\": datetime.now().isoformat(),\n",
-        "        \"embedding\": embedding,\n",
-        "        \"memory_id\": str(ulid.ULID()),\n",
-        "        \"thread_id\": thread_id,\n",
-        "    }\n",
-        "\n",
-        "    try:\n",
-        "        long_term_memory_index.load([memory_data])\n",
-        "    except Exception as e:\n",
-        "        logger.error(f\"Error storing memory: {e}\")\n",
-        "        return\n",
-        "\n",
-        "    logger.info(f\"Stored {memory_type} memory: {content}\")\n",
-        "    \n"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {},
-      "source": [
-        "And now that we're storing memories, we can retrieve them:"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": 90,
-      "metadata": {},
-      "outputs": [],
-      "source": [
-        "def retrieve_memories(\n",
-        "    query: str,\n",
-        "    memory_type: Union[Optional[MemoryType], List[MemoryType]] = None,\n",
-        "    user_id: str = SYSTEM_USER_ID,\n",
-        "    thread_id: Optional[str] = None,\n",
-        "    distance_threshold: float = 0.1,\n",
-        "    limit: int = 5,\n",
-        ") -> List[StoredMemory]:\n",
-        "    \"\"\"Retrieve relevant memories from Redis\"\"\"\n",
-        "    # Create vector query\n",
-        "    logger.debug(f\"Retrieving memories for query: {query}\")\n",
-        "    vector_query = VectorRangeQuery(\n",
-        "        vector=openai_embed.embed(query),\n",
-        "        return_fields=[\n",
-        "            \"content\",\n",
-        "            \"memory_type\",\n",
-        "            \"metadata\",\n",
-        "            \"created_at\",\n",
-        "            \"memory_id\",\n",
-        "            \"thread_id\",\n",
-        "            \"user_id\",\n",
-        "        ],\n",
-        "        num_results=limit,\n",
-        "        vector_field_name=\"embedding\",\n",
-        "        dialect=2,\n",
-        "        distance_threshold=distance_threshold,\n",
-        "    )\n",
-        "\n",
-        "    base_filters = [f\"@user_id:{{{user_id or SYSTEM_USER_ID}}}\"]\n",
-        "\n",
-        "    if memory_type:\n",
-        "        if isinstance(memory_type, list):\n",
-        "            base_filters.append(f\"@memory_type:{{{'|'.join(memory_type)}}}\")\n",
-        "        else:\n",
-        "            base_filters.append(f\"@memory_type:{{{memory_type.value}}}\")\n",
-        "\n",
-        "    if thread_id:\n",
-        "        base_filters.append(f\"@thread_id:{{{thread_id}}}\")\n",
-        "\n",
-        "    vector_query.set_filter(\" \".join(base_filters))\n",
-        "\n",
-        "    # Execute search\n",
-        "    results = long_term_memory_index.query(vector_query)\n",
-        "\n",
-        "    # Parse results\n",
-        "    memories = []\n",
-        "    for doc in results:\n",
-        "        try:\n",
-        "            memory = StoredMemory(\n",
-        "                id=doc[\"id\"],\n",
-        "                memory_id=doc[\"memory_id\"],\n",
-        "                user_id=doc[\"user_id\"],\n",
-        "                thread_id=doc.get(\"thread_id\", None),\n",
-        "                memory_type=MemoryType(doc[\"memory_type\"]),\n",
-        "                content=doc[\"content\"],\n",
-        "                created_at=doc[\"created_at\"],\n",
-        "                metadata=doc[\"metadata\"],\n",
-        "            )\n",
-        "            memories.append(memory)\n",
-        "        except Exception as e:\n",
-        "            logger.error(f\"Error parsing memory: {e}\")\n",
-        "            continue\n",
-        "    return memories"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {},
-      "source": [
-        "## Managing Long-Term Memory Manually vs. Calling Tools\n",
-        "\n",
-        "While making LLM queries, agents can store and retrieve relevant long-term\n",
-        "memories in one of two ways (and more, but these are the two we'll discuss):\n",
-        "\n",
-        "1. Expose memory retrieval and storage as \"tools\" that the LLM can decide to call contextually.\n",
-        "2. Manually augment prompts with relevant memories, and manually extract and store relevant memories.\n",
-        "\n",
-        "These approaches both have tradeoffs.\n",
-        "\n",
-        "**Tool-calling** leaves the decision to store a memory or find relevant memories\n",
-        "up to the LLM. This can add latency to requests. It will generally result in\n",
-        "fewer calls to Redis but will also sometimes miss out on retrieving potentially\n",
-        "relevant context and/or extracting relevant memories from a conversation.\n",
-        "\n",
-        "**Manual memory management** will result in more calls to Redis but will produce\n",
-        "fewer round-trip LLM requests, reducing latency. Manually extracting memories\n",
-        "will generally extract more memories than tool calls, which will store more data\n",
-        "in Redis and should result in more context added to LLM requests. More context\n",
-        "means more contextual awareness but also higher token spend.\n",
-        "\n",
-        "You can test both approaches with this agent by changing the `memory_strategy`\n",
-        "variable.\n",
-        "\n",
-        "## Managing Memory Manually\n",
-        "With the manual memory management strategy, we're going to extract memories after\n",
-        "every interaction between the user and the agent. We're then going to retrieve\n",
-        "those memories during future interactions before we send the query.\n",
-        "\n",
-        "### Extracting Memories\n",
-        "We'll call this `extract_memories` function manually after each interaction:"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": 91,
-      "metadata": {},
-      "outputs": [],
-      "source": [
-        "from langchain_core.messages import HumanMessage\n",
-        "from langchain_core.runnables.config import RunnableConfig\n",
-        "from langchain_openai import ChatOpenAI\n",
-        "from langgraph.graph.message import MessagesState\n",
-        "\n",
-        "\n",
-        "class RuntimeState(MessagesState):\n",
-        "    \"\"\"Agent state (just messages for now)\"\"\"\n",
-        "\n",
-        "    pass\n",
-        "\n",
-        "\n",
-        "memory_llm = ChatOpenAI(model=\"gpt-4o\", temperature=0.3).with_structured_output(\n",
-        "    Memories\n",
-        ")\n",
-        "\n",
-        "\n",
-        "def extract_memories(\n",
-        "    last_processed_message_id: Optional[str],\n",
-        "    state: RuntimeState,\n",
-        "    config: RunnableConfig,\n",
-        ") -> Optional[str]:\n",
-        "    \"\"\"Extract and store memories in long-term memory\"\"\"\n",
-        "    logger.debug(f\"Last message ID is: {last_processed_message_id}\")\n",
-        "\n",
-        "    if len(state[\"messages\"]) < 3:  # Need at least a user message and agent response\n",
-        "        logger.debug(\"Not enough messages to extract memories\")\n",
-        "        return last_processed_message_id\n",
-        "\n",
-        "    user_id = config.get(\"configurable\", {}).get(\"user_id\", None)\n",
-        "    if not user_id:\n",
-        "        logger.warning(\"No user ID found in config when extracting memories\")\n",
-        "        return last_processed_message_id\n",
-        "\n",
-        "    # Get the messages\n",
-        "    messages = state[\"messages\"]\n",
-        "\n",
-        "    # Find the newest message ID (or None if no IDs)\n",
-        "    newest_message_id = None\n",
-        "    for msg in reversed(messages):\n",
-        "        if hasattr(msg, \"id\") and msg.id:\n",
-        "            newest_message_id = msg.id\n",
-        "            break\n",
-        "\n",
-        "    logger.debug(f\"Newest message ID is: {newest_message_id}\")\n",
-        "\n",
-        "    # If we've already processed up to this message ID, skip\n",
-        "    if (\n",
-        "        last_processed_message_id\n",
-        "        and newest_message_id\n",
-        "        and last_processed_message_id == newest_message_id\n",
-        "    ):\n",
-        "        logger.debug(f\"Already processed messages up to ID {newest_message_id}\")\n",
-        "        return last_processed_message_id\n",
-        "\n",
-        "    # Find the index of the message with last_processed_message_id\n",
-        "    start_index = 0\n",
-        "    if last_processed_message_id:\n",
-        "        for i, msg in enumerate(messages):\n",
-        "            if hasattr(msg, \"id\") and msg.id == last_processed_message_id:\n",
-        "                start_index = i + 1  # Start processing from the next message\n",
-        "                break\n",
-        "\n",
-        "    # Check if there are messages to process\n",
-        "    if start_index >= len(messages):\n",
-        "        logger.debug(\"No new messages to process since last processed message\")\n",
-        "        return newest_message_id\n",
-        "\n",
-        "    # Get only the messages after the last processed message\n",
-        "    messages_to_process = messages[start_index:]\n",
-        "\n",
-        "    # If there are not enough messages to process, include some context\n",
-        "    if len(messages_to_process) < 3 and start_index > 0:\n",
-        "        # Include up to 3 messages before the start_index for context\n",
-        "        context_start = max(0, start_index - 3)\n",
-        "        messages_to_process = messages[context_start:]\n",
-        "\n",
-        "    # Format messages for the memory agent\n",
-        "    message_history = \"\\n\".join(\n",
-        "        [\n",
-        "            f\"{'User' if isinstance(msg, HumanMessage) else 'Assistant'}: {msg.content}\"\n",
-        "            for msg in messages_to_process\n",
-        "        ]\n",
-        "    )\n",
-        "\n",
-        "    prompt = f\"\"\"\n",
-        "    You are a long-memory manager. Your job is to analyze this message history\n",
-        "    and extract information that might be useful in future conversations.\n",
-        "    \n",
-        "    Extract two types of memories:\n",
-        "    1. EPISODIC: Personal experiences and preferences specific to this user\n",
-        "       Example: \"User prefers window seats\" or \"User had a bad experience in Paris\"\n",
-        "    \n",
-        "    2. SEMANTIC: General facts and knowledge about travel that could be useful\n",
-        "       Example: \"The best time to visit Japan is during cherry blossom season in April\"\n",
-        "    \n",
-        "    For each memory, provide:\n",
-        "    - Type: The memory type (EPISODIC/SEMANTIC)\n",
-        "    - Content: The actual information to store\n",
-        "    - Metadata: Relevant tags and context (as JSON)\n",
-        "    \n",
-        "    IMPORTANT RULES:\n",
-        "    1. Only extract information that would be genuinely useful for future interactions.\n",
-        "    2. Do not extract procedural knowledge - that is handled by the system's built-in tools and prompts.\n",
-        "    3. You are a large language model, not a human - do not extract facts that you already know.\n",
-        "    \n",
-        "    Message history:\n",
-        "    {message_history}\n",
-        "    \n",
-        "    Extracted memories:\n",
-        "    \"\"\"\n",
-        "\n",
-        "    memories_to_store: Memories = memory_llm.invoke([HumanMessage(content=prompt)])  # type: ignore\n",
-        "\n",
-        "    # Store each extracted memory\n",
-        "    for memory_data in memories_to_store.memories:\n",
-        "        store_memory(\n",
-        "            content=memory_data.content,\n",
-        "            memory_type=memory_data.memory_type,\n",
-        "            user_id=user_id,\n",
-        "            metadata=memory_data.metadata,\n",
-        "        )\n",
-        "\n",
-        "    # Return data with the newest processed message ID\n",
-        "    return newest_message_id"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {},
-      "source": [
-        "We'll use this function in a background thread. We'll start the thread in manual\n",
-        "memory mode but not in tool mode, and we'll run it as a worker that pulls\n",
-        "message histories from a `Queue` to process:"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": 92,
-      "metadata": {},
-      "outputs": [],
-      "source": [
-        "import time\n",
-        "from queue import Queue\n",
-        "\n",
-        "\n",
-        "DEFAULT_MEMORY_WORKER_INTERVAL = 5 * 60  # 5 minutes\n",
-        "DEFAULT_MEMORY_WORKER_BACKOFF_INTERVAL = 10 * 60  # 10 minutes\n",
-        "\n",
-        "\n",
-        "def memory_worker(\n",
-        "    memory_queue: Queue,\n",
-        "    user_id: str,\n",
-        "    interval: int = DEFAULT_MEMORY_WORKER_INTERVAL,\n",
-        "    backoff_interval: int = DEFAULT_MEMORY_WORKER_BACKOFF_INTERVAL,\n",
-        "):\n",
-        "    \"\"\"Worker function that processes long-term memory extraction requests\"\"\"\n",
-        "    key = f\"memory_worker:{user_id}:last_processed_message_id\"\n",
-        "\n",
-        "    last_processed_message_id = redis_client.get(key)\n",
-        "    logger.debug(f\"Last processed message ID: {last_processed_message_id}\")\n",
-        "    last_processed_message_id = (\n",
-        "        str(last_processed_message_id) if last_processed_message_id else None\n",
-        "    )\n",
-        "\n",
-        "    while True:\n",
-        "        try:\n",
-        "            # Get the next state and config from the queue (blocks until an item is available)\n",
-        "            state, config = memory_queue.get()\n",
-        "\n",
-        "            # Extract long-term memories from the conversation history\n",
-        "            last_processed_message_id = extract_memories(\n",
-        "                last_processed_message_id, state, config\n",
-        "            )\n",
-        "            logger.debug(\n",
-        "                f\"Memory worker extracted memories. Last processed message ID: {last_processed_message_id}\"\n",
-        "            )\n",
-        "\n",
-        "            if last_processed_message_id:\n",
-        "                logger.debug(\n",
-        "                    f\"Setting last processed message ID: {last_processed_message_id}\"\n",
-        "                )\n",
-        "                redis_client.set(key, last_processed_message_id)\n",
-        "\n",
-        "            # Mark the task as done\n",
-        "            memory_queue.task_done()\n",
-        "            logger.debug(\"Memory extraction completed for queue item\")\n",
-        "            # Wait before processing next item\n",
-        "            time.sleep(interval)\n",
-        "        except Exception as e:\n",
-        "            # Wait before processing next item after an error\n",
-        "            logger.exception(f\"Error in memory worker thread: {e}\")\n",
-        "            time.sleep(backoff_interval)\n",
-        "\n",
-        "\n",
-        "# NOTE: We'll actually start the worker thread later, in the main loop."
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {},
-      "source": [
-        "## Augmenting Queries with Relevant Memories\n",
-        "\n",
-        "For every user interaction with the agent, we'll query for relevant memories and\n",
-        "add them to the LLM prompt with `retrieve_relevant_memories()`.\n",
-        "\n",
-        "**NOTE:** We only run this node in the \"manual\" memory management strategy. If\n",
-        "using \"tools,\" the LLM will decide when to retrieve memories."
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": 93,
-      "metadata": {},
-      "outputs": [],
-      "source": [
-        "def retrieve_relevant_memories(\n",
-        "    state: RuntimeState, config: RunnableConfig\n",
-        ") -> RuntimeState:\n",
-        "    \"\"\"Retrieve relevant memories based on the current conversation.\"\"\"\n",
-        "    if not state[\"messages\"]:\n",
-        "        logger.debug(\"No messages in state\")\n",
-        "        return state\n",
-        "\n",
-        "    latest_message = state[\"messages\"][-1]\n",
-        "    if not isinstance(latest_message, HumanMessage):\n",
-        "        logger.debug(\"Latest message is not a HumanMessage: \", latest_message)\n",
-        "        return state\n",
-        "\n",
-        "    user_id = config.get(\"configurable\", {}).get(\"user_id\", SYSTEM_USER_ID)\n",
-        "\n",
-        "    query = str(latest_message.content)\n",
-        "    relevant_memories = retrieve_memories(\n",
-        "        query=query,\n",
-        "        memory_type=[MemoryType.EPISODIC, MemoryType.SEMANTIC],\n",
-        "        limit=5,\n",
-        "        user_id=user_id,\n",
-        "        distance_threshold=0.3,\n",
-        "    )\n",
-        "\n",
-        "    logger.debug(f\"All relevant memories: {relevant_memories}\")\n",
-        "\n",
-        "    # We'll augment the latest human message with the relevant memories.\n",
-        "    if relevant_memories:\n",
-        "        memory_context = \"\\n\\n### Relevant memories from previous conversations:\\n\"\n",
-        "\n",
-        "        # Group by memory type\n",
-        "        memory_types = {\n",
-        "            MemoryType.EPISODIC: \"User Preferences & History\",\n",
-        "            MemoryType.SEMANTIC: \"Travel Knowledge\",\n",
-        "        }\n",
-        "\n",
-        "        for mem_type, type_label in memory_types.items():\n",
-        "            memories_of_type = [\n",
-        "                m for m in relevant_memories if m.memory_type == mem_type\n",
-        "            ]\n",
-        "            if memories_of_type:\n",
-        "                memory_context += f\"\\n**{type_label}**:\\n\"\n",
-        "                for mem in memories_of_type:\n",
-        "                    memory_context += f\"- {mem.content}\\n\"\n",
-        "\n",
-        "        augmented_message = HumanMessage(content=f\"{query}\\n{memory_context}\")\n",
-        "        state[\"messages\"][-1] = augmented_message\n",
-        "\n",
-        "        logger.debug(f\"Augmented message: {augmented_message.content}\")\n",
-        "\n",
-        "    return state.copy()\n"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {},
-      "source": [
-        "This is the first function we've seen that represents a **node** in the LangGraph\n",
-        "graph we'll build. As a node representation, this function receives a `state`\n",
-        "object containing the runtime state of the graph, which is where conversation\n",
-        "history resides. Its `config` parameter contains data like the user and thread\n",
-        "IDs.\n",
-        "\n",
-        "This will be the starting node in the graph we'll assemble later. When a user\n",
-        "invokes the graph with a message, the first thing we'll do (when using the\n",
-        "\"manual\" memory strategy) is augment that message with potentially related\n",
-        "memories."
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {},
-      "source": [
-        "## Defining Tools\n",
-        "\n",
-        "Now that we have our storage functions defined, we can create **tools**. We'll\n",
-        "need these to set up our agent in a moment. These tools will only be used when\n",
-        "the agent is operating in \"tools\" memory management mode."
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": 94,
-      "metadata": {},
-      "outputs": [],
-      "source": [
-        "from langchain_core.tools import tool\n",
-        "from typing import Dict, Optional\n",
-        "\n",
-        "\n",
-        "@tool\n",
-        "def store_memory_tool(\n",
-        "    content: str,\n",
-        "    memory_type: MemoryType,\n",
-        "    metadata: Optional[Dict[str, str]] = None,\n",
-        "    config: Optional[RunnableConfig] = None,\n",
-        ") -> str:\n",
-        "    \"\"\"\n",
-        "    Store a long-term memory in the system.\n",
-        "\n",
-        "    Use this tool to save important information about user preferences,\n",
-        "    experiences, or general knowledge that might be useful in future\n",
-        "    interactions.\n",
-        "    \"\"\"\n",
-        "    config = config or RunnableConfig()\n",
-        "    user_id = config.get(\"user_id\", SYSTEM_USER_ID)\n",
-        "    thread_id = config.get(\"thread_id\")\n",
-        "\n",
-        "    try:\n",
-        "        # Store in long-term memory\n",
-        "        store_memory(\n",
-        "            content=content,\n",
-        "            memory_type=memory_type,\n",
-        "            user_id=user_id,\n",
-        "            thread_id=thread_id,\n",
-        "            metadata=str(metadata) if metadata else None,\n",
-        "        )\n",
-        "\n",
-        "        return f\"Successfully stored {memory_type} memory: {content}\"\n",
-        "    except Exception as e:\n",
-        "        return f\"Error storing memory: {str(e)}\"\n",
-        "\n",
-        "\n",
-        "@tool\n",
-        "def retrieve_memories_tool(\n",
-        "    query: str,\n",
-        "    memory_type: List[MemoryType],\n",
-        "    limit: int = 5,\n",
-        "    config: Optional[RunnableConfig] = None,\n",
-        ") -> str:\n",
-        "    \"\"\"\n",
-        "    Retrieve long-term memories relevant to the query.\n",
-        "\n",
-        "    Use this tool to access previously stored information about user\n",
-        "    preferences, experiences, or general knowledge.\n",
-        "    \"\"\"\n",
-        "    config = config or RunnableConfig()\n",
-        "    user_id = config.get(\"user_id\", SYSTEM_USER_ID)\n",
-        "\n",
-        "    try:\n",
-        "        # Get long-term memories\n",
-        "        stored_memories = retrieve_memories(\n",
-        "            query=query,\n",
-        "            memory_type=memory_type,\n",
-        "            user_id=user_id,\n",
-        "            limit=limit,\n",
-        "            distance_threshold=0.3,\n",
-        "        )\n",
-        "\n",
-        "        # Format the response\n",
-        "        response = []\n",
-        "\n",
-        "        if stored_memories:\n",
-        "            response.append(\"Long-term memories:\")\n",
-        "            for memory in stored_memories:\n",
-        "                response.append(f\"- [{memory.memory_type}] {memory.content}\")\n",
-        "\n",
-        "        return \"\\n\".join(response) if response else \"No relevant memories found.\"\n",
-        "\n",
-        "    except Exception as e:\n",
-        "        return f\"Error retrieving memories: {str(e)}\""
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {},
-      "source": [
-        "## Creating the Agent\n",
-        "\n",
-        "Because we're using different LLM objects configured for different purposes and\n",
-        "a prebuilt ReAct agent, we need a node that invokes the agent and returns the\n",
-        "response. But before we can invoke the agent, we need to set it up. This will\n",
-        "involve defining the tools the agent will need."
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {},
-      "outputs": [],
-      "source": [
-        "import json\n",
-        "from typing import Dict, List, Optional, Tuple, Union\n",
-        "\n",
-        "from langchain_community.tools.tavily_search import TavilySearchResults\n",
-        "from langchain_core.callbacks.manager import CallbackManagerForToolRun\n",
-        "from langchain_core.messages import AIMessage, AIMessageChunk, SystemMessage\n",
-        "from langgraph.prebuilt.chat_agent_executor import create_react_agent\n",
-        "from langgraph.checkpoint.redis import RedisSaver\n",
-        "\n",
-        "\n",
-        "class CachingTavilySearchResults(TavilySearchResults):\n",
-        "    \"\"\"\n",
-        "    An interface to Tavily search that caches results in Redis.\n",
-        "    \n",
-        "    Caching the results of the web search allows us to avoid rate limiting,\n",
-        "    improve latency, and reduce costs.\n",
-        "    \"\"\"\n",
-        "\n",
-        "    def _run(\n",
-        "        self,\n",
-        "        query: str,\n",
-        "        run_manager: Optional[CallbackManagerForToolRun] = None,\n",
-        "    ) -> Tuple[Union[List[Dict[str, str]], str], Dict]:\n",
-        "        \"\"\"Use the tool.\"\"\"\n",
-        "        cache_key = f\"tavily_search:{query}\"\n",
-        "        cached_result: Optional[str] = redis_client.get(cache_key)  # type: ignore\n",
-        "        if cached_result:\n",
-        "            return json.loads(cached_result), {}\n",
-        "        else:\n",
-        "            result, raw_results = super()._run(query, run_manager)\n",
-        "            redis_client.set(cache_key, json.dumps(result), ex=60 * 60)\n",
-        "            return result, raw_results\n",
-        "\n",
-        "\n",
-        "# Create a checkpoint saver for short-term memory. This keeps track of the\n",
-        "# conversation history for each thread. Later, we'll continually summarize the\n",
-        "# conversation history to keep the context window manageable, while we also\n",
-        "# extract long-term memories from the conversation history to store in the\n",
-        "# long-term memory index.\n",
-        "redis_saver = RedisSaver(redis_client=redis_client)\n",
-        "redis_saver.setup()\n",
-        "\n",
-        "# Configure an LLM for the agent with a more creative temperature.\n",
-        "llm = ChatOpenAI(model=\"gpt-4o\", temperature=0.7)\n",
-        "\n",
-        "\n",
-        "# Uncomment these lines if you have a Tavily API key and want to use the web\n",
-        "# search tool. The agent is much more useful with this tool.\n",
-        "# web_search_tool = CachingTavilySearchResults(max_results=2)\n",
-        "# base_tools = [web_search_tool]\n",
-        "base_tools = []\n",
-        "\n",
-        "if memory_strategy == MemoryStrategy.TOOLS:\n",
-        "    tools = base_tools + [store_memory_tool, retrieve_memories_tool]\n",
-        "elif memory_strategy == MemoryStrategy.MANUAL:\n",
-        "    tools = base_tools\n",
-        "\n",
-        "\n",
-        "travel_agent = create_react_agent(\n",
-        "    model=llm,\n",
-        "    tools=tools,\n",
-        "    checkpointer=redis_saver,  # Short-term memory: the conversation history\n",
-        "    prompt=SystemMessage(\n",
-        "        content=\"\"\"\n",
-        "        You are a travel assistant helping users plan their trips. You remember user preferences\n",
-        "        and provide personalized recommendations based on past interactions.\n",
-        "        \n",
-        "        You have access to the following types of memory:\n",
-        "        1. Short-term memory: The current conversation thread\n",
-        "        2. Long-term memory: \n",
-        "           - Episodic: User preferences and past trip experiences (e.g., \"User prefers window seats\")\n",
-        "           - Semantic: General knowledge about travel destinations and requirements\n",
-        "           \n",
-        "        Your procedural knowledge (how to search, book flights, etc.) is built into your tools and prompts.\n",
-        "        \n",
-        "        Always be helpful, personal, and context-aware in your responses.\n",
-        "        \"\"\"\n",
-        "    ),\n",
-        ")"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {},
-      "source": [
-        "## Responding to the User\n",
-        "\n",
-        "Now we can write our node that invokes the agent and responds to the user:"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": 96,
-      "metadata": {},
-      "outputs": [],
-      "source": [
-        "def respond_to_user(state: RuntimeState, config: RunnableConfig) -> RuntimeState:\n",
-        "    \"\"\"Invoke the travel agent to generate a response.\"\"\"\n",
-        "    human_messages = [m for m in state[\"messages\"] if isinstance(m, HumanMessage)]\n",
-        "    if not human_messages:\n",
-        "        logger.warning(\"No HumanMessage found in state\")\n",
-        "        return state\n",
-        "\n",
-        "    try:\n",
-        "        for result in travel_agent.stream(\n",
-        "            {\"messages\": state[\"messages\"]}, config=config, stream_mode=\"messages\"\n",
-        "        ):\n",
-        "            result_messages = result.get(\"messages\", [])\n",
-        "\n",
-        "            ai_messages = [\n",
-        "                m\n",
-        "                for m in result_messages\n",
-        "                if isinstance(m, AIMessage) or isinstance(m, AIMessageChunk)\n",
-        "            ]\n",
-        "            if ai_messages:\n",
-        "                agent_response = ai_messages[-1]\n",
-        "                # Append only the agent's response to the original state\n",
-        "                state[\"messages\"].append(agent_response)\n",
-        "\n",
-        "    except Exception as e:\n",
-        "        logger.error(f\"Error invoking travel agent: {e}\")\n",
-        "        agent_response = AIMessage(\n",
-        "            content=\"I'm sorry, I encountered an error processing your request.\"\n",
-        "        )\n",
-        "    return state"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {},
-      "source": [
-        "## Summarizing Conversation History\n",
-        "\n",
-        "We've been focusing on long-term memory, but let's bounce back to short-term\n",
-        "memory for a moment. With `RedisSaver`, LangGraph will manage our message\n",
-        "history automatically. Still, the message history will continue to grow\n",
-        "indefinitely, until it overwhelms the LLM's token context window.\n",
-        "\n",
-        "To solve this problem, we'll add a node to the graph that summarizes the\n",
-        "conversation if it's grown past a threshold."
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": 97,
-      "metadata": {},
-      "outputs": [],
-      "source": [
-        "from langchain_core.messages import RemoveMessage\n",
-        "\n",
-        "# An LLM configured for summarization.\n",
-        "summarizer = ChatOpenAI(model=\"gpt-4o\", temperature=0.3)\n",
-        "\n",
-        "# The number of messages after which we'll summarize the conversation.\n",
-        "MESSAGE_SUMMARIZATION_THRESHOLD = 10\n",
-        "\n",
-        "\n",
-        "def summarize_conversation(\n",
-        "    state: RuntimeState, config: RunnableConfig\n",
-        ") -> Optional[RuntimeState]:\n",
-        "    \"\"\"\n",
-        "    Summarize a list of messages into a concise summary to reduce context length\n",
-        "    while preserving important information.\n",
-        "    \"\"\"\n",
-        "    messages = state[\"messages\"]\n",
-        "    current_message_count = len(messages)\n",
-        "    if current_message_count < MESSAGE_SUMMARIZATION_THRESHOLD:\n",
-        "        logger.debug(f\"Not summarizing conversation: {current_message_count}\")\n",
-        "        return state\n",
-        "\n",
-        "    system_prompt = \"\"\"\n",
-        "    You are a conversation summarizer. Create a concise summary of the previous\n",
-        "    conversation between a user and a travel assistant.\n",
-        "    \n",
-        "    The summary should:\n",
-        "    1. Highlight key topics, preferences, and decisions\n",
-        "    2. Include any specific trip details (destinations, dates, preferences)\n",
-        "    3. Note any outstanding questions or topics that need follow-up\n",
-        "    4. Be concise but informative\n",
-        "    \n",
-        "    Format your summary as a brief narrative paragraph.\n",
-        "    \"\"\"\n",
-        "\n",
-        "    message_content = \"\\n\".join(\n",
-        "        [\n",
-        "            f\"{'User' if isinstance(msg, HumanMessage) else 'Assistant'}: {msg.content}\"\n",
-        "            for msg in messages\n",
-        "        ]\n",
-        "    )\n",
-        "\n",
-        "    # Invoke the summarizer\n",
-        "    summary_messages = [\n",
-        "        SystemMessage(content=system_prompt),\n",
-        "        HumanMessage(\n",
-        "            content=f\"Please summarize this conversation:\\n\\n{message_content}\"\n",
-        "        ),\n",
-        "    ]\n",
-        "\n",
-        "    summary_response = summarizer.invoke(summary_messages)\n",
-        "\n",
-        "    logger.info(f\"Summarized {len(messages)} messages into a conversation summary\")\n",
-        "\n",
-        "    summary_message = SystemMessage(\n",
-        "        content=f\"\"\"\n",
-        "        Summary of the conversation so far:\n",
-        "        \n",
-        "        {summary_response.content}\n",
-        "        \n",
-        "        Please continue the conversation based on this summary and the recent messages.\n",
-        "        \"\"\"\n",
-        "    )\n",
-        "    remove_messages = [\n",
-        "        RemoveMessage(id=msg.id) for msg in messages if msg.id is not None\n",
-        "    ]\n",
-        "\n",
-        "    state[\"messages\"] = [  # type: ignore\n",
-        "        *remove_messages,\n",
-        "        summary_message,\n",
-        "        state[\"messages\"][-1],\n",
-        "    ]\n",
-        "\n",
-        "    return state.copy()"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {},
-      "source": [
-        "## Assembling the Graph\n",
-        "\n",
-        "It's time to assemble our graph!"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": 98,
-      "metadata": {},
-      "outputs": [],
-      "source": [
-        "from langgraph.graph import StateGraph, END, START\n",
-        "\n",
-        "\n",
-        "workflow = StateGraph(RuntimeState)\n",
-        "\n",
-        "workflow.add_node(\"respond\", respond_to_user)\n",
-        "workflow.add_node(\"summarize_conversation\", summarize_conversation)\n",
-        "\n",
-        "if memory_strategy == MemoryStrategy.MANUAL:\n",
-        "    # In manual memory mode, we'll retrieve relevant memories before\n",
-        "    # responding to the user, and then augment the user's message with the\n",
-        "    # relevant memories.\n",
-        "    workflow.add_node(\"retrieve_memories\", retrieve_relevant_memories)\n",
-        "    workflow.add_edge(START, \"retrieve_memories\")\n",
-        "    workflow.add_edge(\"retrieve_memories\", \"respond\")\n",
-        "else:\n",
-        "    # In tool-calling mode, we'll respond to the user and let the LLM\n",
-        "    # decide when to retrieve and store memories, using tool calls.\n",
-        "    workflow.add_edge(START, \"respond\")\n",
-        "\n",
-        "# Regardless of memory strategy, we'll summarize the conversation after\n",
-        "# responding to the user, to keep the context window manageable.\n",
-        "workflow.add_edge(\"respond\", \"summarize_conversation\")\n",
-        "workflow.add_edge(\"summarize_conversation\", END)\n",
-        "\n",
-        "# Finally, compile the graph.\n",
-        "graph = workflow.compile(checkpointer=redis_saver)"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {},
-      "source": [
-        "## Consolidating Memories in a Background Thread\n",
-        "\n",
-        "We're almost ready to create the main loop that runs our graph. First, though,\n",
-        "let's create a worker that consolidates similar memories on a regular schedule,\n",
-        "using semantic search. We'll run the worker in a background thread later, in the\n",
-        "main loop."
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": 99,
-      "metadata": {},
-      "outputs": [],
-      "source": [
-        "from redisvl.query import FilterQuery\n",
-        "\n",
-        "\n",
-        "def consolidate_memories(user_id: str, batch_size: int = 10):\n",
-        "    \"\"\"\n",
-        "    Periodically merge similar long-term memories for a user.\n",
-        "    \"\"\"\n",
-        "    logger.info(f\"Starting memory consolidation for user {user_id}\")\n",
-        "    \n",
-        "    # For each memory type, consolidate separately\n",
-        "\n",
-        "    for memory_type in MemoryType:\n",
-        "        all_memories = []\n",
-        "\n",
-        "        # Get all memories of this type for the user\n",
-        "        of_type_for_user = (Tag(\"user_id\") == user_id) & (\n",
-        "            Tag(\"memory_type\") == memory_type\n",
-        "        )\n",
-        "        filter_query = FilterQuery(filter_expression=of_type_for_user)\n",
-        "        \n",
-        "        for batch in long_term_memory_index.paginate(filter_query, page_size=batch_size):\n",
-        "            all_memories.extend(batch)\n",
-        "            \n",
-        "        all_memories = long_term_memory_index.query(filter_query)\n",
-        "        if not all_memories:\n",
-        "            continue\n",
-        "\n",
-        "        # Group similar memories\n",
-        "        processed_ids = set()\n",
-        "        for memory in all_memories:\n",
-        "            if memory[\"id\"] in processed_ids:\n",
-        "                continue\n",
-        "\n",
-        "            memory_embedding = memory[\"embedding\"]\n",
-        "            vector_query = VectorRangeQuery(\n",
-        "                vector=memory_embedding,\n",
-        "                num_results=10,\n",
-        "                vector_field_name=\"embedding\",\n",
-        "                filter_expression=of_type_for_user\n",
-        "                & (Tag(\"memory_id\") != memory[\"memory_id\"]),\n",
-        "                distance_threshold=0.1,\n",
-        "                return_fields=[\n",
-        "                    \"content\",\n",
-        "                    \"metadata\",\n",
-        "                ],\n",
-        "            )\n",
-        "            similar_memories = long_term_memory_index.query(vector_query)\n",
-        "\n",
-        "            # If we found similar memories, consolidate them\n",
-        "            if similar_memories:\n",
-        "                combined_content = memory[\"content\"]\n",
-        "                combined_metadata = memory[\"metadata\"]\n",
-        "\n",
-        "                if combined_metadata:\n",
-        "                    try:\n",
-        "                        combined_metadata = json.loads(combined_metadata)\n",
-        "                    except Exception as e:\n",
-        "                        logger.error(f\"Error parsing metadata: {e}\")\n",
-        "                        combined_metadata = {}\n",
-        "\n",
-        "                for similar in similar_memories:\n",
-        "                    # Merge the content of similar memories\n",
-        "                    combined_content += f\" {similar['content']}\"\n",
-        "\n",
-        "                    if similar[\"metadata\"]:\n",
-        "                        try:\n",
-        "                            similar_metadata = json.loads(similar[\"metadata\"])\n",
-        "                        except Exception as e:\n",
-        "                            logger.error(f\"Error parsing metadata: {e}\")\n",
-        "                        similar_metadata = {}\n",
-        "\n",
-        "                    combined_metadata = {**combined_metadata, **similar_metadata}\n",
-        "\n",
-        "                # Create a consolidated memory\n",
-        "                new_metadata = {\n",
-        "                    \"consolidated\": True,\n",
-        "                    \"source_count\": len(similar_memories) + 1,\n",
-        "                    **combined_metadata,\n",
-        "                }\n",
-        "                consolidated_memory = {\n",
-        "                    \"content\": summarize_memories(combined_content, memory_type),\n",
-        "                    \"memory_type\": memory_type.value,\n",
-        "                    \"metadata\": json.dumps(new_metadata),\n",
-        "                    \"user_id\": user_id,\n",
-        "                }\n",
-        "\n",
-        "                # Delete the old memories\n",
-        "                delete_memory(memory[\"id\"])\n",
-        "                for similar in similar_memories:\n",
-        "                    delete_memory(similar[\"id\"])\n",
-        "\n",
-        "                # Store the new consolidated memory\n",
-        "                store_memory(\n",
-        "                    content=consolidated_memory[\"content\"],\n",
-        "                    memory_type=memory_type,\n",
-        "                    user_id=user_id,\n",
-        "                    metadata=consolidated_memory[\"metadata\"],\n",
-        "                )\n",
-        "\n",
-        "                logger.info(\n",
-        "                    f\"Consolidated {len(similar_memories) + 1} memories into one\"\n",
-        "                )\n",
-        "\n",
-        "\n",
-        "def delete_memory(memory_id: str):\n",
-        "    \"\"\"Delete a memory from Redis\"\"\"\n",
-        "    try:\n",
-        "        result = long_term_memory_index.drop_keys([memory_id])\n",
-        "    except Exception as e:\n",
-        "        logger.error(f\"Deleting memory {memory_id} failed: {e}\")\n",
-        "    if result == 0:\n",
-        "        logger.debug(f\"Deleting memory {memory_id} failed: memory not found\")\n",
-        "    else:\n",
-        "        logger.info(f\"Deleted memory {memory_id}\")\n",
-        "\n",
-        "\n",
-        "def summarize_memories(combined_content: str, memory_type: MemoryType) -> str:\n",
-        "    \"\"\"Use the LLM to create a concise summary of similar memories\"\"\"\n",
-        "    try:\n",
-        "        system_prompt = f\"\"\"\n",
-        "        You are a memory consolidation assistant. Your task is to create a single, \n",
-        "        concise memory from these similar memory fragments. The new memory should\n",
-        "        be a {memory_type.value} memory.\n",
-        "        \n",
-        "        Combine the information without repetition while preserving all important details.\n",
-        "        \"\"\"\n",
-        "\n",
-        "        messages = [\n",
-        "            SystemMessage(content=system_prompt),\n",
-        "            HumanMessage(\n",
-        "                content=f\"Consolidate these similar memories into one:\\n\\n{combined_content}\"\n",
-        "            ),\n",
-        "        ]\n",
-        "\n",
-        "        response = summarizer.invoke(messages)\n",
-        "        return str(response.content)\n",
-        "    except Exception as e:\n",
-        "        logger.error(f\"Error summarizing memories: {e}\")\n",
-        "        # Fall back to just using the combined content\n",
-        "        return combined_content\n",
-        "\n",
-        "\n",
-        "def memory_consolidation_worker(user_id: str):\n",
-        "    \"\"\"\n",
-        "    Worker that periodically consolidates memories for the active user.\n",
-        "\n",
-        "    NOTE: In production, this would probably use a background task framework, such\n",
-        "          as rq or Celery, and run on a schedule.\n",
-        "    \"\"\"\n",
-        "    while True:\n",
-        "        try:\n",
-        "            consolidate_memories(user_id)\n",
-        "            # Run every 10 minutes\n",
-        "            time.sleep(10 * 60)\n",
-        "        except Exception as e:\n",
-        "            logger.exception(f\"Error in memory consolidation worker: {e}\")\n",
-        "            # If there's an error, wait an hour and try again\n",
-        "            time.sleep(60 * 60)"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {},
-      "source": [
-        "## The Main Loop\n",
-        "\n",
-        "Now we can put everything together and run the main loop.\n",
-        "\n",
-        "Running this cell should ask for your OpenAI and Tavily keys, then a username\n",
-        "and thread ID. You'll enter a loop in which you can enter queries and see\n",
-        "responses from the agent printed below the following cell."
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {},
-      "outputs": [],
-      "source": [
-        "import threading\n",
-        "\n",
-        "\n",
-        "def main(thread_id: str = \"book_flight\", user_id: str = \"demo_user\"):\n",
-        "    \"\"\"Main interaction loop for the travel agent\"\"\"\n",
-        "    print(\"Welcome to the Travel Assistant! (Type 'exit' to quit)\")\n",
-        "\n",
-        "    config = RunnableConfig(configurable={\"thread_id\": thread_id, \"user_id\": user_id})\n",
-        "    state = RuntimeState(messages=[])\n",
-        "\n",
-        "    # If we're using the manual memory strategy, we need to create a queue for\n",
-        "    # memory processing and start a worker thread. After every 'round' of a\n",
-        "    # conversation, the main loop will add the current state and config to the\n",
-        "    # queue for memory processing.\n",
-        "    if memory_strategy == MemoryStrategy.MANUAL:\n",
-        "        # Create a queue for memory processing\n",
-        "        memory_queue = Queue()\n",
-        "\n",
-        "        # Start a worker thread that will process memory extraction tasks\n",
-        "        memory_thread = threading.Thread(\n",
-        "            target=memory_worker, args=(memory_queue, user_id), daemon=True\n",
-        "        )\n",
-        "        memory_thread.start()\n",
-        "\n",
-        "    # We always run consolidation in the background, regardless of memory strategy.\n",
-        "    consolidation_thread = threading.Thread(\n",
-        "        target=memory_consolidation_worker, args=(user_id,), daemon=True\n",
-        "    )\n",
-        "    consolidation_thread.start()\n",
-        "\n",
-        "    while True:\n",
-        "        user_input = input(\"\\nYou (type 'quit' to quit): \")\n",
-        "\n",
-        "        if not user_input:\n",
-        "            continue\n",
-        "\n",
-        "        if user_input.lower() in [\"exit\", \"quit\"]:\n",
-        "            print(\"Thank you for using the Travel Assistant. Goodbye!\")\n",
-        "            break\n",
-        "\n",
-        "        state[\"messages\"].append(HumanMessage(content=user_input))\n",
-        "\n",
-        "        try:\n",
-        "            # Process user input through the graph\n",
-        "            for result in graph.stream(state, config=config, stream_mode=\"values\"):\n",
-        "                state = RuntimeState(**result)\n",
-        "\n",
-        "            logger.debug(f\"# of messages after run: {len(state['messages'])}\")\n",
-        "\n",
-        "            # Find the most recent AI message, so we can print the response\n",
-        "            ai_messages = [m for m in state[\"messages\"] if isinstance(m, AIMessage)]\n",
-        "            if ai_messages:\n",
-        "                message = ai_messages[-1].content\n",
-        "            else:\n",
-        "                logger.error(\"No AI messages after run\")\n",
-        "                message = \"I'm sorry, I couldn't process your request properly.\"\n",
-        "                # Add the error message to the state\n",
-        "                state[\"messages\"].append(AIMessage(content=message))\n",
-        "\n",
-        "            print(f\"\\nAssistant: {message}\")\n",
-        "\n",
-        "            # Add the current state to the memory processing queue\n",
-        "            if memory_strategy == MemoryStrategy.MANUAL:\n",
-        "                memory_queue.put((state.copy(), config))\n",
-        "\n",
-        "        except Exception as e:\n",
-        "            logger.exception(f\"Error processing request: {e}\")\n",
-        "            error_message = \"I'm sorry, I encountered an error processing your request.\"\n",
-        "            print(f\"\\nAssistant: {error_message}\")\n",
-        "            # Add the error message to the state\n",
-        "            state[\"messages\"].append(AIMessage(content=error_message))\n",
-        "\n",
-        "\n",
-        "try:\n",
-        "    user_id = input(\"Enter a user ID: \") or \"demo_user\"\n",
-        "    thread_id = input(\"Enter a thread ID: \") or \"demo_thread\"\n",
-        "except Exception:\n",
-        "    # If we're running in CI, we don't have a terminal to input from, so just exit\n",
-        "    exit()\n",
-        "else:\n",
-        "    main(thread_id, user_id)\n"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {},
-      "source": [
-        "## That's a Wrap!\n",
-        "\n",
-        "Want to make your own agent? Try the [LangGraph Quickstart](https://langchain-ai.github.io/langgraph/tutorials/introduction/). Then add our [Redis checkpointer](https://github.com/redis-developer/langgraph-redis) to give your agent fast, persistent memory!"
-      ]
-    }
-  ],
-  "metadata": {
-    "kernelspec": {
-      "display_name": "env",
-      "language": "python",
-      "name": "python3"
-    },
-    "language_info": {
-      "codemirror_mode": {
-        "name": "ipython",
-        "version": 3
-      },
-      "file_extension": ".py",
-      "mimetype": "text/x-python",
-      "name": "python",
-      "nbconvert_exporter": "python",
-      "pygments_lexer": "ipython3",
-      "version": "3.11.11"
-    }
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "![Redis](https://redis.io/wp-content/uploads/2024/04/Logotype.svg?auto=webp&quality=85,75&width=120)\n",
+    "\n",
+    "# [WIP] Agent Memory Using Agent Memory Server\n",
+    "This notebook demonstrates how to manage short-term and long-term agent memory using Redis and [Agent Memory Server](https://github.com/redis-developer/agent-memory-server). We'll explore:\n",
+    "\n",
+    "1. Working memory management\n",
+    "2. Long-term memory storage and retrieval\n",
+    "3. Managing memory manually vs. exposing tool access\n",
+    "4. Managing conversation history size with summarization\n",
+    "5. Consolidating similar/duplicate long-term memories\n",
+    "\n",
+    "## What We'll Build\n",
+    "\n",
+    "We're going to build two versions of a travel agent, one that manages long-term\n",
+    "memory manually and one that does so using tools the LLM calls.\n",
+    "\n",
+    "## Let's Begin!\n",
+    "<a href=\"https://colab.research.google.com/github/redis-developer/redis-ai-resources/blob/main/python-recipes/agents/03_memory_agent.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
+   ]
   },
-  "nbformat": 4,
-  "nbformat_minor": 2
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Setup\n",
+    "\n",
+    "### Packages"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "%pip install -q tavily-python pydantic agent-memory-server"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Required API Keys\n",
+    "\n",
+    "You must add an OpenAI API key with billing information for this lesson. You will also need\n",
+    "a Tavily API key. Tavily API keys come with free credits at the time of this writing."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 19,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# NBVAL_SKIP\n",
+    "import getpass\n",
+    "import os\n",
+    "\n",
+    "\n",
+    "def _set_env(key: str):\n",
+    "    if key not in os.environ:\n",
+    "        os.environ[key] = getpass.getpass(f\"{key}:\")\n",
+    "\n",
+    "\n",
+    "_set_env(\"OPENAI_API_KEY\")\n",
+    "\n",
+    "# Uncomment this if you have a Tavily API key and want to\n",
+    "# use the web search tool.\n",
+    "# _set_env(\"TAVILY_API_KEY\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Run redis\n",
+    "\n",
+    "### For colab\n",
+    "\n",
+    "Convert the following cell to Python to run it in Colab."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 10,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "%%sh\n",
+    "# Exit if this is not running in Colab\n",
+    "if [ -z \"$COLAB_RELEASE_TAG\" ]; then\n",
+    "  exit 0\n",
+    "fi\n",
+    "\n",
+    "curl -fsSL https://packages.redis.io/gpg | sudo gpg --dearmor -o /usr/share/keyrings/redis-archive-keyring.gpg\n",
+    "echo \"deb [signed-by=/usr/share/keyrings/redis-archive-keyring.gpg] https://packages.redis.io/deb $(lsb_release -cs) main\" | sudo tee /etc/apt/sources.list.d/redis.list\n",
+    "sudo apt-get update  > /dev/null 2>&1\n",
+    "sudo apt-get install redis-stack-server  > /dev/null 2>&1\n",
+    "redis-stack-server --daemonize yes"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "#### For Alternative Environments\n",
+    "There are many ways to get the necessary redis-stack instance running\n",
+    "1. On cloud, deploy a [FREE instance of Redis in the cloud](https://redis.com/try-free/). Or, if you have your\n",
+    "own version of Redis Enterprise running, that works too!\n",
+    "2. Per OS, [see the docs](https://redis.io/docs/latest/operate/oss_and_stack/install/install-stack/)\n",
+    "3. With docker: `docker run -d --name redis -p 6379:6379 redis:8.1`\n",
+    "\n",
+    "## Test connection"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from redis import Redis\n",
+    "\n",
+    "\n",
+    "# Use the environment variable if set, otherwise default to localhost\n",
+    "REDIS_URL = os.getenv(\"REDIS_URL\", \"redis://localhost:6379\")\n",
+    "\n",
+    "redis_client = Redis.from_url(REDIS_URL)\n",
+    "redis_client.ping()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Short-Term vs. Long-Term Memory\n",
+    "\n",
+    "The agent uses **short-term memory** and **long-term memory**. The implementations\n",
+    "of short-term and long-term memory differ, as does how the agent uses them. Let's\n",
+    "dig into the details. We'll return to code soon!\n",
+    "\n",
+    "### Short-Term Memory\n",
+    "\n",
+    "For short-term memory, the agent keeps track of conversation history with Redis.\n",
+    "Because this is a LangGraph agent, we use the `RedisSaver` class to achieve\n",
+    "this. `RedisSaver` is what LangGraph refers to as a _checkpointer_. You can read\n",
+    "more about checkpointers in the [LangGraph\n",
+    "documentation](https://langchain-ai.github.io/langgraph/concepts/persistence/).\n",
+    "In short, they store state for each node in the graph, which for this agent\n",
+    "includes conversation history.\n",
+    "\n",
+    "Here's a diagram showing how the agent uses Redis for short-term memory. Each node\n",
+    "in the graph (Retrieve Users, Respond, Summarize Conversation) persists its \"state\"\n",
+    "to Redis. The state object contains the agent's message conversation history for\n",
+    "the current thread.\n",
+    "\n",
+    "<img src=\"../../assets/short-term-memory.png\" style=\"width: 100%; max-width: 400px;\">\n",
+    "\n",
+    "If Redis persistence is on, then Redis will persist short-term memory to\n",
+    "disk. This means if you quit the agent and return with the same thread ID and\n",
+    "user ID, you'll resume the same conversation.\n",
+    "\n",
+    "Conversation histories can grow long and pollute an LLM's context window. To manage\n",
+    "this, after every \"turn\" of a conversation, the agent summarizes messages when the\n",
+    "conversation grows past a configurable threshold. Checkpointers do not do this by\n",
+    "default, so we've created a node in the graph for summarization.\n",
+    "\n",
+    "**NOTE**: We'll see example code for the summarization node later in this notebook.\n",
+    "\n",
+    "### Long-Term Memory\n",
+    "\n",
+    "Aside from conversation history, the agent stores long-term memories in a search\n",
+    "index in Redis, using [RedisVL](https://docs.redisvl.com/en/latest/). Here's a\n",
+    "diagram showing the components involved:\n",
+    "\n",
+    "<img src=\"../../assets/long-term-memory.png\" style=\"width: 100%; max-width: 600px;\">\n",
+    "\n",
+    "The agent tracks two types of long-term memories:\n",
+    "\n",
+    "- **Episodic**: User-specific experiences and preferences\n",
+    "- **Semantic**: General knowledge about travel destinations and requirements\n",
+    "\n",
+    "**NOTE** If you're familiar with the [CoALA\n",
+    "paper](https://arxiv.org/abs/2309.02427), the terms \"episodic\" and \"semantic\"\n",
+    "here map to the same concepts in the paper. CoALA discusses a third type of\n",
+    "memory, _procedural_. In our example, we consider logic encoded in Python in the\n",
+    "agent codebase to be its procedural memory.\n",
+    "\n",
+    "### Representing Long-Term Memory in Python\n",
+    "We use a couple of Pydantic models to represent long-term memories, both before\n",
+    "and after they're stored in Redis:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 12,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from datetime import datetime\n",
+    "from enum import Enum\n",
+    "\n",
+    "import ulid\n",
+    "from pydantic import BaseModel, Field\n",
+    "\n",
+    "\n",
+    "class MemoryType(str, Enum):\n",
+    "    \"\"\"\n",
+    "    The type of a long-term memory.\n",
+    "\n",
+    "    EPISODIC: User specific experiences and preferences\n",
+    "\n",
+    "    SEMANTIC: General knowledge on top of the user's preferences and LLM's\n",
+    "    training data.\n",
+    "    \"\"\"\n",
+    "\n",
+    "    EPISODIC = \"episodic\"\n",
+    "    SEMANTIC = \"semantic\"\n",
+    "\n",
+    "\n",
+    "class Memory(BaseModel):\n",
+    "    \"\"\"Represents a single long-term memory.\"\"\"\n",
+    "\n",
+    "    content: str\n",
+    "    memory_type: MemoryType\n",
+    "    metadata: str\n",
+    "\n",
+    "\n",
+    "class Memories(BaseModel):\n",
+    "    \"\"\"\n",
+    "    A list of memories extracted from a conversation by an LLM.\n",
+    "\n",
+    "    NOTE: OpenAI's structured output requires us to wrap the list in an object.\n",
+    "    \"\"\"\n",
+    "\n",
+    "    memories: list[Memory]\n",
+    "\n",
+    "\n",
+    "class StoredMemory(Memory):\n",
+    "    \"\"\"A stored long-term memory\"\"\"\n",
+    "\n",
+    "    id: str  # The redis key\n",
+    "    memory_id: ulid.ULID = Field(default_factory=lambda: ulid.ULID())\n",
+    "    created_at: datetime = Field(default_factory=datetime.now)\n",
+    "    user_id: str | None = None\n",
+    "    thread_id: str | None = None\n",
+    "    memory_type: MemoryType | None = None\n",
+    "\n",
+    "\n",
+    "class MemoryStrategy(str, Enum):\n",
+    "    \"\"\"\n",
+    "    Supported strategies for managing long-term memory.\n",
+    "\n",
+    "    This notebook supports two strategies for working with long-term memory:\n",
+    "\n",
+    "    TOOLS: The LLM decides when to store and retrieve long-term memories, using\n",
+    "    tools (AKA, function-calling) to do so.\n",
+    "\n",
+    "    MANUAL: The agent manually retrieves long-term memories relevant to the\n",
+    "    current conversation before sending every message and analyzes every\n",
+    "    response to extract memories to store.\n",
+    "\n",
+    "    NOTE: In both cases, the agent runs a background thread to consolidate\n",
+    "    memories, and a workflow step to summarize conversations after the history\n",
+    "    grows past a threshold.\n",
+    "    \"\"\"\n",
+    "\n",
+    "    TOOLS = \"tools\"\n",
+    "    MANUAL = \"manual\"\n",
+    "\n",
+    "\n",
+    "# By default, we'll use the manual strategy\n",
+    "memory_strategy = MemoryStrategy.MANUAL"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "We'll return to these models soon to see them in action!"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Short-Term Memory Storage and Retrieval\n",
+    "\n",
+    "The `RedisSaver` class handles the basics of short-term memory storage for us,\n",
+    "so we don't need to do anything here."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Long-Term Memory Storage and Retrieval\n",
+    "\n",
+    "We use RedisVL to store and retrieve long-term memories with vector embeddings.\n",
+    "This allows for semantic search of past experiences and knowledge.\n",
+    "\n",
+    "Let's set up a new search index to store and query memories:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from redisvl.index import SearchIndex\n",
+    "from redisvl.schema.schema import IndexSchema\n",
+    "\n",
+    "\n",
+    "# Define schema for long-term memory index\n",
+    "memory_schema = IndexSchema.from_dict(\n",
+    "    {\n",
+    "        \"index\": {\n",
+    "            \"name\": \"agent_memories\",\n",
+    "            \"prefix\": \"memory:\",\n",
+    "            \"key_separator\": \":\",\n",
+    "            \"storage_type\": \"json\",\n",
+    "        },\n",
+    "        \"fields\": [\n",
+    "            {\"name\": \"content\", \"type\": \"text\"},\n",
+    "            {\"name\": \"memory_type\", \"type\": \"tag\"},\n",
+    "            {\"name\": \"metadata\", \"type\": \"text\"},\n",
+    "            {\"name\": \"created_at\", \"type\": \"text\"},\n",
+    "            {\"name\": \"user_id\", \"type\": \"tag\"},\n",
+    "            {\"name\": \"memory_id\", \"type\": \"tag\"},\n",
+    "            {\n",
+    "                \"name\": \"embedding\",\n",
+    "                \"type\": \"vector\",\n",
+    "                \"attrs\": {\n",
+    "                    \"algorithm\": \"flat\",\n",
+    "                    \"dims\": 1536,  # OpenAI embedding dimension\n",
+    "                    \"distance_metric\": \"cosine\",\n",
+    "                    \"datatype\": \"float32\",\n",
+    "                },\n",
+    "            },\n",
+    "        ],\n",
+    "    }\n",
+    ")\n",
+    "\n",
+    "# Create search index\n",
+    "try:\n",
+    "    long_term_memory_index = SearchIndex(\n",
+    "        schema=memory_schema, redis_client=redis_client, overwrite=True\n",
+    "    )\n",
+    "    long_term_memory_index.create()\n",
+    "    print(\"Long-term memory index ready\")\n",
+    "except Exception as e:\n",
+    "    print(f\"Error creating index: {e}\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Storage and Retrieval Functions\n",
+    "\n",
+    "Now that we have a search index in Redis, we can write functions to store and\n",
+    "retrieve memories. We can use RedisVL to write these.\n",
+    "\n",
+    "First, we'll write a utility function to check if a memory similar to a given\n",
+    "memory already exists in the index. Later, we can use this to avoid storing\n",
+    "duplicate memories.\n",
+    "\n",
+    "#### Checking for Similar Memories"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import logging\n",
+    "\n",
+    "from redisvl.query import VectorRangeQuery\n",
+    "from redisvl.query.filter import Tag\n",
+    "from redisvl.utils.vectorize.text.openai import OpenAITextVectorizer\n",
+    "\n",
+    "\n",
+    "logger = logging.getLogger(__name__)\n",
+    "\n",
+    "# If we have any memories that aren't associated with a user, we'll use this ID.\n",
+    "SYSTEM_USER_ID = \"system\"\n",
+    "\n",
+    "openai_embed = OpenAITextVectorizer(model=\"text-embedding-ada-002\")\n",
+    "\n",
+    "# Change this to MemoryStrategy.TOOLS to use function-calling to store and\n",
+    "# retrieve memories.\n",
+    "memory_strategy = MemoryStrategy.MANUAL\n",
+    "\n",
+    "\n",
+    "def similar_memory_exists(\n",
+    "    content: str,\n",
+    "    memory_type: MemoryType,\n",
+    "    user_id: str = SYSTEM_USER_ID,\n",
+    "    thread_id: str | None = None,\n",
+    "    distance_threshold: float = 0.1,\n",
+    ") -> bool:\n",
+    "    \"\"\"Check if a similar long-term memory already exists in Redis.\"\"\"\n",
+    "    query_embedding = openai_embed.embed(content)\n",
+    "    filters = (Tag(\"user_id\") == user_id) & (Tag(\"memory_type\") == memory_type)\n",
+    "    if thread_id:\n",
+    "        filters = filters & (Tag(\"thread_id\") == thread_id)\n",
+    "\n",
+    "    # Search for similar memories\n",
+    "    vector_query = VectorRangeQuery(\n",
+    "        vector=query_embedding,\n",
+    "        num_results=1,\n",
+    "        vector_field_name=\"embedding\",\n",
+    "        filter_expression=filters,\n",
+    "        distance_threshold=distance_threshold,\n",
+    "        return_fields=[\"id\"],\n",
+    "    )\n",
+    "    results = long_term_memory_index.query(vector_query)\n",
+    "    logger.debug(f\"Similar memory search results: {results}\")\n",
+    "\n",
+    "    if results:\n",
+    "        logger.debug(\n",
+    "            f\"{len(results)} similar {'memory' if results.count == 1 else 'memories'} found. First: \"\n",
+    "            f\"{results[0]['id']}. Skipping storage.\"\n",
+    "        )\n",
+    "        return True\n",
+    "\n",
+    "    return False"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "#### Storing and Retrieving Long-Term Memories"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "We'll use the `similar_memory_exists()` function when we store memories:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 89,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from datetime import datetime\n",
+    "\n",
+    "import ulid\n",
+    "\n",
+    "\n",
+    "def store_memory(\n",
+    "    content: str,\n",
+    "    memory_type: MemoryType,\n",
+    "    user_id: str = SYSTEM_USER_ID,\n",
+    "    thread_id: str | None = None,\n",
+    "    metadata: str | None = None,\n",
+    "):\n",
+    "    \"\"\"Store a long-term memory in Redis, avoiding duplicates.\"\"\"\n",
+    "    if metadata is None:\n",
+    "        metadata = \"{}\"\n",
+    "\n",
+    "    logger.info(f\"Preparing to store memory: {content}\")\n",
+    "\n",
+    "    if similar_memory_exists(content, memory_type, user_id, thread_id):\n",
+    "        logger.info(\"Similar memory found, skipping storage\")\n",
+    "        return\n",
+    "\n",
+    "    embedding = openai_embed.embed(content)\n",
+    "\n",
+    "    memory_data = {\n",
+    "        \"user_id\": user_id or SYSTEM_USER_ID,\n",
+    "        \"content\": content,\n",
+    "        \"memory_type\": memory_type.value,\n",
+    "        \"metadata\": metadata,\n",
+    "        \"created_at\": datetime.now().isoformat(),\n",
+    "        \"embedding\": embedding,\n",
+    "        \"memory_id\": str(ulid.ULID()),\n",
+    "        \"thread_id\": thread_id,\n",
+    "    }\n",
+    "\n",
+    "    try:\n",
+    "        long_term_memory_index.load([memory_data])\n",
+    "    except Exception as e:\n",
+    "        logger.error(f\"Error storing memory: {e}\")\n",
+    "        return\n",
+    "\n",
+    "    logger.info(f\"Stored {memory_type} memory: {content}\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "And now that we're storing memories, we can retrieve them:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 90,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def retrieve_memories(\n",
+    "    query: str,\n",
+    "    memory_type: MemoryType | None | list[MemoryType] = None,\n",
+    "    user_id: str = SYSTEM_USER_ID,\n",
+    "    thread_id: str | None = None,\n",
+    "    distance_threshold: float = 0.1,\n",
+    "    limit: int = 5,\n",
+    ") -> list[StoredMemory]:\n",
+    "    \"\"\"Retrieve relevant memories from Redis\"\"\"\n",
+    "    # Create vector query\n",
+    "    logger.debug(f\"Retrieving memories for query: {query}\")\n",
+    "    vector_query = VectorRangeQuery(\n",
+    "        vector=openai_embed.embed(query),\n",
+    "        return_fields=[\n",
+    "            \"content\",\n",
+    "            \"memory_type\",\n",
+    "            \"metadata\",\n",
+    "            \"created_at\",\n",
+    "            \"memory_id\",\n",
+    "            \"thread_id\",\n",
+    "            \"user_id\",\n",
+    "        ],\n",
+    "        num_results=limit,\n",
+    "        vector_field_name=\"embedding\",\n",
+    "        dialect=2,\n",
+    "        distance_threshold=distance_threshold,\n",
+    "    )\n",
+    "\n",
+    "    base_filters = [f\"@user_id:{{{user_id or SYSTEM_USER_ID}}}\"]\n",
+    "\n",
+    "    if memory_type:\n",
+    "        if isinstance(memory_type, list):\n",
+    "            base_filters.append(f\"@memory_type:{{{'|'.join(memory_type)}}}\")\n",
+    "        else:\n",
+    "            base_filters.append(f\"@memory_type:{{{memory_type.value}}}\")\n",
+    "\n",
+    "    if thread_id:\n",
+    "        base_filters.append(f\"@thread_id:{{{thread_id}}}\")\n",
+    "\n",
+    "    vector_query.set_filter(\" \".join(base_filters))\n",
+    "\n",
+    "    # Execute search\n",
+    "    results = long_term_memory_index.query(vector_query)\n",
+    "\n",
+    "    # Parse results\n",
+    "    memories = []\n",
+    "    for doc in results:\n",
+    "        try:\n",
+    "            memory = StoredMemory(\n",
+    "                id=doc[\"id\"],\n",
+    "                memory_id=doc[\"memory_id\"],\n",
+    "                user_id=doc[\"user_id\"],\n",
+    "                thread_id=doc.get(\"thread_id\", None),\n",
+    "                memory_type=MemoryType(doc[\"memory_type\"]),\n",
+    "                content=doc[\"content\"],\n",
+    "                created_at=doc[\"created_at\"],\n",
+    "                metadata=doc[\"metadata\"],\n",
+    "            )\n",
+    "            memories.append(memory)\n",
+    "        except Exception as e:\n",
+    "            logger.error(f\"Error parsing memory: {e}\")\n",
+    "            continue\n",
+    "    return memories"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Managing Long-Term Memory Manually vs. Calling Tools\n",
+    "\n",
+    "While making LLM queries, agents can store and retrieve relevant long-term\n",
+    "memories in one of two ways (and more, but these are the two we'll discuss):\n",
+    "\n",
+    "1. Expose memory retrieval and storage as \"tools\" that the LLM can decide to call contextually.\n",
+    "2. Manually augment prompts with relevant memories, and manually extract and store relevant memories.\n",
+    "\n",
+    "These approaches both have tradeoffs.\n",
+    "\n",
+    "**Tool-calling** leaves the decision to store a memory or find relevant memories\n",
+    "up to the LLM. This can add latency to requests. It will generally result in\n",
+    "fewer calls to Redis but will also sometimes miss out on retrieving potentially\n",
+    "relevant context and/or extracting relevant memories from a conversation.\n",
+    "\n",
+    "**Manual memory management** will result in more calls to Redis but will produce\n",
+    "fewer round-trip LLM requests, reducing latency. Manually extracting memories\n",
+    "will generally extract more memories than tool calls, which will store more data\n",
+    "in Redis and should result in more context added to LLM requests. More context\n",
+    "means more contextual awareness but also higher token spend.\n",
+    "\n",
+    "You can test both approaches with this agent by changing the `memory_strategy`\n",
+    "variable.\n",
+    "\n",
+    "## Managing Memory Manually\n",
+    "With the manual memory management strategy, we're going to extract memories after\n",
+    "every interaction between the user and the agent. We're then going to retrieve\n",
+    "those memories during future interactions before we send the query.\n",
+    "\n",
+    "### Extracting Memories\n",
+    "We'll call this `extract_memories` function manually after each interaction:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 91,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from langchain_core.messages import HumanMessage\n",
+    "from langchain_core.runnables.config import RunnableConfig\n",
+    "from langchain_openai import ChatOpenAI\n",
+    "from langgraph.graph.message import MessagesState\n",
+    "\n",
+    "\n",
+    "class RuntimeState(MessagesState):\n",
+    "    \"\"\"Agent state (just messages for now)\"\"\"\n",
+    "\n",
+    "    pass\n",
+    "\n",
+    "\n",
+    "memory_llm = ChatOpenAI(model=\"gpt-4o\", temperature=0.3).with_structured_output(\n",
+    "    Memories\n",
+    ")\n",
+    "\n",
+    "\n",
+    "def extract_memories(\n",
+    "    last_processed_message_id: str | None,\n",
+    "    state: RuntimeState,\n",
+    "    config: RunnableConfig,\n",
+    ") -> str | None:\n",
+    "    \"\"\"Extract and store memories in long-term memory\"\"\"\n",
+    "    logger.debug(f\"Last message ID is: {last_processed_message_id}\")\n",
+    "\n",
+    "    if len(state[\"messages\"]) < 3:  # Need at least a user message and agent response\n",
+    "        logger.debug(\"Not enough messages to extract memories\")\n",
+    "        return last_processed_message_id\n",
+    "\n",
+    "    user_id = config.get(\"configurable\", {}).get(\"user_id\", None)\n",
+    "    if not user_id:\n",
+    "        logger.warning(\"No user ID found in config when extracting memories\")\n",
+    "        return last_processed_message_id\n",
+    "\n",
+    "    # Get the messages\n",
+    "    messages = state[\"messages\"]\n",
+    "\n",
+    "    # Find the newest message ID (or None if no IDs)\n",
+    "    newest_message_id = None\n",
+    "    for msg in reversed(messages):\n",
+    "        if hasattr(msg, \"id\") and msg.id:\n",
+    "            newest_message_id = msg.id\n",
+    "            break\n",
+    "\n",
+    "    logger.debug(f\"Newest message ID is: {newest_message_id}\")\n",
+    "\n",
+    "    # If we've already processed up to this message ID, skip\n",
+    "    if (\n",
+    "        last_processed_message_id\n",
+    "        and newest_message_id\n",
+    "        and last_processed_message_id == newest_message_id\n",
+    "    ):\n",
+    "        logger.debug(f\"Already processed messages up to ID {newest_message_id}\")\n",
+    "        return last_processed_message_id\n",
+    "\n",
+    "    # Find the index of the message with last_processed_message_id\n",
+    "    start_index = 0\n",
+    "    if last_processed_message_id:\n",
+    "        for i, msg in enumerate(messages):\n",
+    "            if hasattr(msg, \"id\") and msg.id == last_processed_message_id:\n",
+    "                start_index = i + 1  # Start processing from the next message\n",
+    "                break\n",
+    "\n",
+    "    # Check if there are messages to process\n",
+    "    if start_index >= len(messages):\n",
+    "        logger.debug(\"No new messages to process since last processed message\")\n",
+    "        return newest_message_id\n",
+    "\n",
+    "    # Get only the messages after the last processed message\n",
+    "    messages_to_process = messages[start_index:]\n",
+    "\n",
+    "    # If there are not enough messages to process, include some context\n",
+    "    if len(messages_to_process) < 3 and start_index > 0:\n",
+    "        # Include up to 3 messages before the start_index for context\n",
+    "        context_start = max(0, start_index - 3)\n",
+    "        messages_to_process = messages[context_start:]\n",
+    "\n",
+    "    # Format messages for the memory agent\n",
+    "    message_history = \"\\n\".join(\n",
+    "        [\n",
+    "            f\"{'User' if isinstance(msg, HumanMessage) else 'Assistant'}: {msg.content}\"\n",
+    "            for msg in messages_to_process\n",
+    "        ]\n",
+    "    )\n",
+    "\n",
+    "    prompt = f\"\"\"\n",
+    "    You are a long-memory manager. Your job is to analyze this message history\n",
+    "    and extract information that might be useful in future conversations.\n",
+    "    \n",
+    "    Extract two types of memories:\n",
+    "    1. EPISODIC: Personal experiences and preferences specific to this user\n",
+    "       Example: \"User prefers window seats\" or \"User had a bad experience in Paris\"\n",
+    "    \n",
+    "    2. SEMANTIC: General facts and knowledge about travel that could be useful\n",
+    "       Example: \"The best time to visit Japan is during cherry blossom season in April\"\n",
+    "    \n",
+    "    For each memory, provide:\n",
+    "    - Type: The memory type (EPISODIC/SEMANTIC)\n",
+    "    - Content: The actual information to store\n",
+    "    - Metadata: Relevant tags and context (as JSON)\n",
+    "    \n",
+    "    IMPORTANT RULES:\n",
+    "    1. Only extract information that would be genuinely useful for future interactions.\n",
+    "    2. Do not extract procedural knowledge - that is handled by the system's built-in tools and prompts.\n",
+    "    3. You are a large language model, not a human - do not extract facts that you already know.\n",
+    "    \n",
+    "    Message history:\n",
+    "    {message_history}\n",
+    "    \n",
+    "    Extracted memories:\n",
+    "    \"\"\"\n",
+    "\n",
+    "    memories_to_store: Memories = memory_llm.invoke([HumanMessage(content=prompt)])  # type: ignore\n",
+    "\n",
+    "    # Store each extracted memory\n",
+    "    for memory_data in memories_to_store.memories:\n",
+    "        store_memory(\n",
+    "            content=memory_data.content,\n",
+    "            memory_type=memory_data.memory_type,\n",
+    "            user_id=user_id,\n",
+    "            metadata=memory_data.metadata,\n",
+    "        )\n",
+    "\n",
+    "    # Return data with the newest processed message ID\n",
+    "    return newest_message_id"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "We'll use this function in a background thread. We'll start the thread in manual\n",
+    "memory mode but not in tool mode, and we'll run it as a worker that pulls\n",
+    "message histories from a `Queue` to process:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 92,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import time\n",
+    "from queue import Queue\n",
+    "\n",
+    "\n",
+    "DEFAULT_MEMORY_WORKER_INTERVAL = 5 * 60  # 5 minutes\n",
+    "DEFAULT_MEMORY_WORKER_BACKOFF_INTERVAL = 10 * 60  # 10 minutes\n",
+    "\n",
+    "\n",
+    "def memory_worker(\n",
+    "    memory_queue: Queue,\n",
+    "    user_id: str,\n",
+    "    interval: int = DEFAULT_MEMORY_WORKER_INTERVAL,\n",
+    "    backoff_interval: int = DEFAULT_MEMORY_WORKER_BACKOFF_INTERVAL,\n",
+    "):\n",
+    "    \"\"\"Worker function that processes long-term memory extraction requests\"\"\"\n",
+    "    key = f\"memory_worker:{user_id}:last_processed_message_id\"\n",
+    "\n",
+    "    last_processed_message_id = redis_client.get(key)\n",
+    "    logger.debug(f\"Last processed message ID: {last_processed_message_id}\")\n",
+    "    last_processed_message_id = (\n",
+    "        str(last_processed_message_id) if last_processed_message_id else None\n",
+    "    )\n",
+    "\n",
+    "    while True:\n",
+    "        try:\n",
+    "            # Get the next state and config from the queue (blocks until an item is available)\n",
+    "            state, config = memory_queue.get()\n",
+    "\n",
+    "            # Extract long-term memories from the conversation history\n",
+    "            last_processed_message_id = extract_memories(\n",
+    "                last_processed_message_id, state, config\n",
+    "            )\n",
+    "            logger.debug(\n",
+    "                f\"Memory worker extracted memories. Last processed message ID: {last_processed_message_id}\"\n",
+    "            )\n",
+    "\n",
+    "            if last_processed_message_id:\n",
+    "                logger.debug(\n",
+    "                    f\"Setting last processed message ID: {last_processed_message_id}\"\n",
+    "                )\n",
+    "                redis_client.set(key, last_processed_message_id)\n",
+    "\n",
+    "            # Mark the task as done\n",
+    "            memory_queue.task_done()\n",
+    "            logger.debug(\"Memory extraction completed for queue item\")\n",
+    "            # Wait before processing next item\n",
+    "            time.sleep(interval)\n",
+    "        except Exception as e:\n",
+    "            # Wait before processing next item after an error\n",
+    "            logger.exception(f\"Error in memory worker thread: {e}\")\n",
+    "            time.sleep(backoff_interval)\n",
+    "\n",
+    "\n",
+    "# NOTE: We'll actually start the worker thread later, in the main loop."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Augmenting Queries with Relevant Memories\n",
+    "\n",
+    "For every user interaction with the agent, we'll query for relevant memories and\n",
+    "add them to the LLM prompt with `retrieve_relevant_memories()`.\n",
+    "\n",
+    "**NOTE:** We only run this node in the \"manual\" memory management strategy. If\n",
+    "using \"tools,\" the LLM will decide when to retrieve memories."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 93,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def retrieve_relevant_memories(\n",
+    "    state: RuntimeState, config: RunnableConfig\n",
+    ") -> RuntimeState:\n",
+    "    \"\"\"Retrieve relevant memories based on the current conversation.\"\"\"\n",
+    "    if not state[\"messages\"]:\n",
+    "        logger.debug(\"No messages in state\")\n",
+    "        return state\n",
+    "\n",
+    "    latest_message = state[\"messages\"][-1]\n",
+    "    if not isinstance(latest_message, HumanMessage):\n",
+    "        logger.debug(\"Latest message is not a HumanMessage: \", latest_message)\n",
+    "        return state\n",
+    "\n",
+    "    user_id = config.get(\"configurable\", {}).get(\"user_id\", SYSTEM_USER_ID)\n",
+    "\n",
+    "    query = str(latest_message.content)\n",
+    "    relevant_memories = retrieve_memories(\n",
+    "        query=query,\n",
+    "        memory_type=[MemoryType.EPISODIC, MemoryType.SEMANTIC],\n",
+    "        limit=5,\n",
+    "        user_id=user_id,\n",
+    "        distance_threshold=0.3,\n",
+    "    )\n",
+    "\n",
+    "    logger.debug(f\"All relevant memories: {relevant_memories}\")\n",
+    "\n",
+    "    # We'll augment the latest human message with the relevant memories.\n",
+    "    if relevant_memories:\n",
+    "        memory_context = \"\\n\\n### Relevant memories from previous conversations:\\n\"\n",
+    "\n",
+    "        # Group by memory type\n",
+    "        memory_types = {\n",
+    "            MemoryType.EPISODIC: \"User Preferences & History\",\n",
+    "            MemoryType.SEMANTIC: \"Travel Knowledge\",\n",
+    "        }\n",
+    "\n",
+    "        for mem_type, type_label in memory_types.items():\n",
+    "            memories_of_type = [\n",
+    "                m for m in relevant_memories if m.memory_type == mem_type\n",
+    "            ]\n",
+    "            if memories_of_type:\n",
+    "                memory_context += f\"\\n**{type_label}**:\\n\"\n",
+    "                for mem in memories_of_type:\n",
+    "                    memory_context += f\"- {mem.content}\\n\"\n",
+    "\n",
+    "        augmented_message = HumanMessage(content=f\"{query}\\n{memory_context}\")\n",
+    "        state[\"messages\"][-1] = augmented_message\n",
+    "\n",
+    "        logger.debug(f\"Augmented message: {augmented_message.content}\")\n",
+    "\n",
+    "    return state.copy()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "This is the first function we've seen that represents a **node** in the LangGraph\n",
+    "graph we'll build. As a node representation, this function receives a `state`\n",
+    "object containing the runtime state of the graph, which is where conversation\n",
+    "history resides. Its `config` parameter contains data like the user and thread\n",
+    "IDs.\n",
+    "\n",
+    "This will be the starting node in the graph we'll assemble later. When a user\n",
+    "invokes the graph with a message, the first thing we'll do (when using the\n",
+    "\"manual\" memory strategy) is augment that message with potentially related\n",
+    "memories."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Defining Tools\n",
+    "\n",
+    "Now that we have our storage functions defined, we can create **tools**. We'll\n",
+    "need these to set up our agent in a moment. These tools will only be used when\n",
+    "the agent is operating in \"tools\" memory management mode."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 94,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "\n",
+    "from langchain_core.tools import tool\n",
+    "\n",
+    "\n",
+    "@tool\n",
+    "def store_memory_tool(\n",
+    "    content: str,\n",
+    "    memory_type: MemoryType,\n",
+    "    metadata: dict[str, str] | None = None,\n",
+    "    config: RunnableConfig | None = None,\n",
+    ") -> str:\n",
+    "    \"\"\"\n",
+    "    Store a long-term memory in the system.\n",
+    "\n",
+    "    Use this tool to save important information about user preferences,\n",
+    "    experiences, or general knowledge that might be useful in future\n",
+    "    interactions.\n",
+    "    \"\"\"\n",
+    "    config = config or RunnableConfig()\n",
+    "    user_id = config.get(\"user_id\", SYSTEM_USER_ID)\n",
+    "    thread_id = config.get(\"thread_id\")\n",
+    "\n",
+    "    try:\n",
+    "        # Store in long-term memory\n",
+    "        store_memory(\n",
+    "            content=content,\n",
+    "            memory_type=memory_type,\n",
+    "            user_id=user_id,\n",
+    "            thread_id=thread_id,\n",
+    "            metadata=str(metadata) if metadata else None,\n",
+    "        )\n",
+    "\n",
+    "        return f\"Successfully stored {memory_type} memory: {content}\"\n",
+    "    except Exception as e:\n",
+    "        return f\"Error storing memory: {str(e)}\"\n",
+    "\n",
+    "\n",
+    "@tool\n",
+    "def retrieve_memories_tool(\n",
+    "    query: str,\n",
+    "    memory_type: list[MemoryType],\n",
+    "    limit: int = 5,\n",
+    "    config: RunnableConfig | None = None,\n",
+    ") -> str:\n",
+    "    \"\"\"\n",
+    "    Retrieve long-term memories relevant to the query.\n",
+    "\n",
+    "    Use this tool to access previously stored information about user\n",
+    "    preferences, experiences, or general knowledge.\n",
+    "    \"\"\"\n",
+    "    config = config or RunnableConfig()\n",
+    "    user_id = config.get(\"user_id\", SYSTEM_USER_ID)\n",
+    "\n",
+    "    try:\n",
+    "        # Get long-term memories\n",
+    "        stored_memories = retrieve_memories(\n",
+    "            query=query,\n",
+    "            memory_type=memory_type,\n",
+    "            user_id=user_id,\n",
+    "            limit=limit,\n",
+    "            distance_threshold=0.3,\n",
+    "        )\n",
+    "\n",
+    "        # Format the response\n",
+    "        response = []\n",
+    "\n",
+    "        if stored_memories:\n",
+    "            response.append(\"Long-term memories:\")\n",
+    "            for memory in stored_memories:\n",
+    "                response.append(f\"- [{memory.memory_type}] {memory.content}\")\n",
+    "\n",
+    "        return \"\\n\".join(response) if response else \"No relevant memories found.\"\n",
+    "\n",
+    "    except Exception as e:\n",
+    "        return f\"Error retrieving memories: {str(e)}\""
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Creating the Agent\n",
+    "\n",
+    "Because we're using different LLM objects configured for different purposes and\n",
+    "a prebuilt ReAct agent, we need a node that invokes the agent and returns the\n",
+    "response. But before we can invoke the agent, we need to set it up. This will\n",
+    "involve defining the tools the agent will need."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import json\n",
+    "\n",
+    "from langchain_community.tools.tavily_search import TavilySearchResults\n",
+    "from langchain_core.callbacks.manager import CallbackManagerForToolRun\n",
+    "from langchain_core.messages import AIMessage, AIMessageChunk, SystemMessage\n",
+    "from langgraph.checkpoint.redis import RedisSaver\n",
+    "from langgraph.prebuilt.chat_agent_executor import create_react_agent\n",
+    "\n",
+    "\n",
+    "class CachingTavilySearchResults(TavilySearchResults):\n",
+    "    \"\"\"\n",
+    "    An interface to Tavily search that caches results in Redis.\n",
+    "\n",
+    "    Caching the results of the web search allows us to avoid rate limiting,\n",
+    "    improve latency, and reduce costs.\n",
+    "    \"\"\"\n",
+    "\n",
+    "    def _run(\n",
+    "        self,\n",
+    "        query: str,\n",
+    "        run_manager: CallbackManagerForToolRun | None = None,\n",
+    "    ) -> tuple[list[dict[str, str]] | str, dict]:\n",
+    "        \"\"\"Use the tool.\"\"\"\n",
+    "        cache_key = f\"tavily_search:{query}\"\n",
+    "        cached_result: str | None = redis_client.get(cache_key)  # type: ignore\n",
+    "        if cached_result:\n",
+    "            return json.loads(cached_result), {}\n",
+    "        result, raw_results = super()._run(query, run_manager)\n",
+    "        redis_client.set(cache_key, json.dumps(result), ex=60 * 60)\n",
+    "        return result, raw_results\n",
+    "\n",
+    "\n",
+    "# Create a checkpoint saver for short-term memory. This keeps track of the\n",
+    "# conversation history for each thread. Later, we'll continually summarize the\n",
+    "# conversation history to keep the context window manageable, while we also\n",
+    "# extract long-term memories from the conversation history to store in the\n",
+    "# long-term memory index.\n",
+    "redis_saver = RedisSaver(redis_client=redis_client)\n",
+    "redis_saver.setup()\n",
+    "\n",
+    "# Configure an LLM for the agent with a more creative temperature.\n",
+    "llm = ChatOpenAI(model=\"gpt-4o\", temperature=0.7)\n",
+    "\n",
+    "\n",
+    "# Uncomment these lines if you have a Tavily API key and want to use the web\n",
+    "# search tool. The agent is much more useful with this tool.\n",
+    "# web_search_tool = CachingTavilySearchResults(max_results=2)\n",
+    "# base_tools = [web_search_tool]\n",
+    "base_tools = []\n",
+    "\n",
+    "if memory_strategy == MemoryStrategy.TOOLS:\n",
+    "    tools = base_tools + [store_memory_tool, retrieve_memories_tool]\n",
+    "elif memory_strategy == MemoryStrategy.MANUAL:\n",
+    "    tools = base_tools\n",
+    "\n",
+    "\n",
+    "travel_agent = create_react_agent(\n",
+    "    model=llm,\n",
+    "    tools=tools,\n",
+    "    checkpointer=redis_saver,  # Short-term memory: the conversation history\n",
+    "    prompt=SystemMessage(\n",
+    "        content=\"\"\"\n",
+    "        You are a travel assistant helping users plan their trips. You remember user preferences\n",
+    "        and provide personalized recommendations based on past interactions.\n",
+    "        \n",
+    "        You have access to the following types of memory:\n",
+    "        1. Short-term memory: The current conversation thread\n",
+    "        2. Long-term memory: \n",
+    "           - Episodic: User preferences and past trip experiences (e.g., \"User prefers window seats\")\n",
+    "           - Semantic: General knowledge about travel destinations and requirements\n",
+    "           \n",
+    "        Your procedural knowledge (how to search, book flights, etc.) is built into your tools and prompts.\n",
+    "        \n",
+    "        Always be helpful, personal, and context-aware in your responses.\n",
+    "        \"\"\"\n",
+    "    ),\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Responding to the User\n",
+    "\n",
+    "Now we can write our node that invokes the agent and responds to the user:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 96,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def respond_to_user(state: RuntimeState, config: RunnableConfig) -> RuntimeState:\n",
+    "    \"\"\"Invoke the travel agent to generate a response.\"\"\"\n",
+    "    human_messages = [m for m in state[\"messages\"] if isinstance(m, HumanMessage)]\n",
+    "    if not human_messages:\n",
+    "        logger.warning(\"No HumanMessage found in state\")\n",
+    "        return state\n",
+    "\n",
+    "    try:\n",
+    "        for result in travel_agent.stream(\n",
+    "            {\"messages\": state[\"messages\"]}, config=config, stream_mode=\"messages\"\n",
+    "        ):\n",
+    "            result_messages = result.get(\"messages\", [])\n",
+    "\n",
+    "            ai_messages = [\n",
+    "                m\n",
+    "                for m in result_messages\n",
+    "                if isinstance(m, AIMessage) or isinstance(m, AIMessageChunk)\n",
+    "            ]\n",
+    "            if ai_messages:\n",
+    "                agent_response = ai_messages[-1]\n",
+    "                # Append only the agent's response to the original state\n",
+    "                state[\"messages\"].append(agent_response)\n",
+    "\n",
+    "    except Exception as e:\n",
+    "        logger.error(f\"Error invoking travel agent: {e}\")\n",
+    "        agent_response = AIMessage(\n",
+    "            content=\"I'm sorry, I encountered an error processing your request.\"\n",
+    "        )\n",
+    "    return state"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Summarizing Conversation History\n",
+    "\n",
+    "We've been focusing on long-term memory, but let's bounce back to short-term\n",
+    "memory for a moment. With `RedisSaver`, LangGraph will manage our message\n",
+    "history automatically. Still, the message history will continue to grow\n",
+    "indefinitely, until it overwhelms the LLM's token context window.\n",
+    "\n",
+    "To solve this problem, we'll add a node to the graph that summarizes the\n",
+    "conversation if it's grown past a threshold."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 97,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from langchain_core.messages import RemoveMessage\n",
+    "\n",
+    "\n",
+    "# An LLM configured for summarization.\n",
+    "summarizer = ChatOpenAI(model=\"gpt-4o\", temperature=0.3)\n",
+    "\n",
+    "# The number of messages after which we'll summarize the conversation.\n",
+    "MESSAGE_SUMMARIZATION_THRESHOLD = 10\n",
+    "\n",
+    "\n",
+    "def summarize_conversation(\n",
+    "    state: RuntimeState, config: RunnableConfig\n",
+    ") -> RuntimeState | None:\n",
+    "    \"\"\"\n",
+    "    Summarize a list of messages into a concise summary to reduce context length\n",
+    "    while preserving important information.\n",
+    "    \"\"\"\n",
+    "    messages = state[\"messages\"]\n",
+    "    current_message_count = len(messages)\n",
+    "    if current_message_count < MESSAGE_SUMMARIZATION_THRESHOLD:\n",
+    "        logger.debug(f\"Not summarizing conversation: {current_message_count}\")\n",
+    "        return state\n",
+    "\n",
+    "    system_prompt = \"\"\"\n",
+    "    You are a conversation summarizer. Create a concise summary of the previous\n",
+    "    conversation between a user and a travel assistant.\n",
+    "    \n",
+    "    The summary should:\n",
+    "    1. Highlight key topics, preferences, and decisions\n",
+    "    2. Include any specific trip details (destinations, dates, preferences)\n",
+    "    3. Note any outstanding questions or topics that need follow-up\n",
+    "    4. Be concise but informative\n",
+    "    \n",
+    "    Format your summary as a brief narrative paragraph.\n",
+    "    \"\"\"\n",
+    "\n",
+    "    message_content = \"\\n\".join(\n",
+    "        [\n",
+    "            f\"{'User' if isinstance(msg, HumanMessage) else 'Assistant'}: {msg.content}\"\n",
+    "            for msg in messages\n",
+    "        ]\n",
+    "    )\n",
+    "\n",
+    "    # Invoke the summarizer\n",
+    "    summary_messages = [\n",
+    "        SystemMessage(content=system_prompt),\n",
+    "        HumanMessage(\n",
+    "            content=f\"Please summarize this conversation:\\n\\n{message_content}\"\n",
+    "        ),\n",
+    "    ]\n",
+    "\n",
+    "    summary_response = summarizer.invoke(summary_messages)\n",
+    "\n",
+    "    logger.info(f\"Summarized {len(messages)} messages into a conversation summary\")\n",
+    "\n",
+    "    summary_message = SystemMessage(\n",
+    "        content=f\"\"\"\n",
+    "        Summary of the conversation so far:\n",
+    "        \n",
+    "        {summary_response.content}\n",
+    "        \n",
+    "        Please continue the conversation based on this summary and the recent messages.\n",
+    "        \"\"\"\n",
+    "    )\n",
+    "    remove_messages = [\n",
+    "        RemoveMessage(id=msg.id) for msg in messages if msg.id is not None\n",
+    "    ]\n",
+    "\n",
+    "    state[\"messages\"] = [  # type: ignore\n",
+    "        *remove_messages,\n",
+    "        summary_message,\n",
+    "        state[\"messages\"][-1],\n",
+    "    ]\n",
+    "\n",
+    "    return state.copy()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Assembling the Graph\n",
+    "\n",
+    "It's time to assemble our graph!"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 98,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from langgraph.graph import END, START, StateGraph\n",
+    "\n",
+    "\n",
+    "workflow = StateGraph(RuntimeState)\n",
+    "\n",
+    "workflow.add_node(\"respond\", respond_to_user)\n",
+    "workflow.add_node(\"summarize_conversation\", summarize_conversation)\n",
+    "\n",
+    "if memory_strategy == MemoryStrategy.MANUAL:\n",
+    "    # In manual memory mode, we'll retrieve relevant memories before\n",
+    "    # responding to the user, and then augment the user's message with the\n",
+    "    # relevant memories.\n",
+    "    workflow.add_node(\"retrieve_memories\", retrieve_relevant_memories)\n",
+    "    workflow.add_edge(START, \"retrieve_memories\")\n",
+    "    workflow.add_edge(\"retrieve_memories\", \"respond\")\n",
+    "else:\n",
+    "    # In tool-calling mode, we'll respond to the user and let the LLM\n",
+    "    # decide when to retrieve and store memories, using tool calls.\n",
+    "    workflow.add_edge(START, \"respond\")\n",
+    "\n",
+    "# Regardless of memory strategy, we'll summarize the conversation after\n",
+    "# responding to the user, to keep the context window manageable.\n",
+    "workflow.add_edge(\"respond\", \"summarize_conversation\")\n",
+    "workflow.add_edge(\"summarize_conversation\", END)\n",
+    "\n",
+    "# Finally, compile the graph.\n",
+    "graph = workflow.compile(checkpointer=redis_saver)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Consolidating Memories in a Background Thread\n",
+    "\n",
+    "We're almost ready to create the main loop that runs our graph. First, though,\n",
+    "let's create a worker that consolidates similar memories on a regular schedule,\n",
+    "using semantic search. We'll run the worker in a background thread later, in the\n",
+    "main loop."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 99,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from redisvl.query import FilterQuery\n",
+    "\n",
+    "\n",
+    "def consolidate_memories(user_id: str, batch_size: int = 10):\n",
+    "    \"\"\"\n",
+    "    Periodically merge similar long-term memories for a user.\n",
+    "    \"\"\"\n",
+    "    logger.info(f\"Starting memory consolidation for user {user_id}\")\n",
+    "\n",
+    "    # For each memory type, consolidate separately\n",
+    "\n",
+    "    for memory_type in MemoryType:\n",
+    "        all_memories = []\n",
+    "\n",
+    "        # Get all memories of this type for the user\n",
+    "        of_type_for_user = (Tag(\"user_id\") == user_id) & (\n",
+    "            Tag(\"memory_type\") == memory_type\n",
+    "        )\n",
+    "        filter_query = FilterQuery(filter_expression=of_type_for_user)\n",
+    "\n",
+    "        for batch in long_term_memory_index.paginate(\n",
+    "            filter_query, page_size=batch_size\n",
+    "        ):\n",
+    "            all_memories.extend(batch)\n",
+    "\n",
+    "        all_memories = long_term_memory_index.query(filter_query)\n",
+    "        if not all_memories:\n",
+    "            continue\n",
+    "\n",
+    "        # Group similar memories\n",
+    "        processed_ids = set()\n",
+    "        for memory in all_memories:\n",
+    "            if memory[\"id\"] in processed_ids:\n",
+    "                continue\n",
+    "\n",
+    "            memory_embedding = memory[\"embedding\"]\n",
+    "            vector_query = VectorRangeQuery(\n",
+    "                vector=memory_embedding,\n",
+    "                num_results=10,\n",
+    "                vector_field_name=\"embedding\",\n",
+    "                filter_expression=of_type_for_user\n",
+    "                & (Tag(\"memory_id\") != memory[\"memory_id\"]),\n",
+    "                distance_threshold=0.1,\n",
+    "                return_fields=[\n",
+    "                    \"content\",\n",
+    "                    \"metadata\",\n",
+    "                ],\n",
+    "            )\n",
+    "            similar_memories = long_term_memory_index.query(vector_query)\n",
+    "\n",
+    "            # If we found similar memories, consolidate them\n",
+    "            if similar_memories:\n",
+    "                combined_content = memory[\"content\"]\n",
+    "                combined_metadata = memory[\"metadata\"]\n",
+    "\n",
+    "                if combined_metadata:\n",
+    "                    try:\n",
+    "                        combined_metadata = json.loads(combined_metadata)\n",
+    "                    except Exception as e:\n",
+    "                        logger.error(f\"Error parsing metadata: {e}\")\n",
+    "                        combined_metadata = {}\n",
+    "\n",
+    "                for similar in similar_memories:\n",
+    "                    # Merge the content of similar memories\n",
+    "                    combined_content += f\" {similar['content']}\"\n",
+    "\n",
+    "                    if similar[\"metadata\"]:\n",
+    "                        try:\n",
+    "                            similar_metadata = json.loads(similar[\"metadata\"])\n",
+    "                        except Exception as e:\n",
+    "                            logger.error(f\"Error parsing metadata: {e}\")\n",
+    "                        similar_metadata = {}\n",
+    "\n",
+    "                    combined_metadata = {**combined_metadata, **similar_metadata}\n",
+    "\n",
+    "                # Create a consolidated memory\n",
+    "                new_metadata = {\n",
+    "                    \"consolidated\": True,\n",
+    "                    \"source_count\": len(similar_memories) + 1,\n",
+    "                    **combined_metadata,\n",
+    "                }\n",
+    "                consolidated_memory = {\n",
+    "                    \"content\": summarize_memories(combined_content, memory_type),\n",
+    "                    \"memory_type\": memory_type.value,\n",
+    "                    \"metadata\": json.dumps(new_metadata),\n",
+    "                    \"user_id\": user_id,\n",
+    "                }\n",
+    "\n",
+    "                # Delete the old memories\n",
+    "                delete_memory(memory[\"id\"])\n",
+    "                for similar in similar_memories:\n",
+    "                    delete_memory(similar[\"id\"])\n",
+    "\n",
+    "                # Store the new consolidated memory\n",
+    "                store_memory(\n",
+    "                    content=consolidated_memory[\"content\"],\n",
+    "                    memory_type=memory_type,\n",
+    "                    user_id=user_id,\n",
+    "                    metadata=consolidated_memory[\"metadata\"],\n",
+    "                )\n",
+    "\n",
+    "                logger.info(\n",
+    "                    f\"Consolidated {len(similar_memories) + 1} memories into one\"\n",
+    "                )\n",
+    "\n",
+    "\n",
+    "def delete_memory(memory_id: str):\n",
+    "    \"\"\"Delete a memory from Redis\"\"\"\n",
+    "    try:\n",
+    "        result = long_term_memory_index.drop_keys([memory_id])\n",
+    "    except Exception as e:\n",
+    "        logger.error(f\"Deleting memory {memory_id} failed: {e}\")\n",
+    "    if result == 0:\n",
+    "        logger.debug(f\"Deleting memory {memory_id} failed: memory not found\")\n",
+    "    else:\n",
+    "        logger.info(f\"Deleted memory {memory_id}\")\n",
+    "\n",
+    "\n",
+    "def summarize_memories(combined_content: str, memory_type: MemoryType) -> str:\n",
+    "    \"\"\"Use the LLM to create a concise summary of similar memories\"\"\"\n",
+    "    try:\n",
+    "        system_prompt = f\"\"\"\n",
+    "        You are a memory consolidation assistant. Your task is to create a single, \n",
+    "        concise memory from these similar memory fragments. The new memory should\n",
+    "        be a {memory_type.value} memory.\n",
+    "        \n",
+    "        Combine the information without repetition while preserving all important details.\n",
+    "        \"\"\"\n",
+    "\n",
+    "        messages = [\n",
+    "            SystemMessage(content=system_prompt),\n",
+    "            HumanMessage(\n",
+    "                content=f\"Consolidate these similar memories into one:\\n\\n{combined_content}\"\n",
+    "            ),\n",
+    "        ]\n",
+    "\n",
+    "        response = summarizer.invoke(messages)\n",
+    "        return str(response.content)\n",
+    "    except Exception as e:\n",
+    "        logger.error(f\"Error summarizing memories: {e}\")\n",
+    "        # Fall back to just using the combined content\n",
+    "        return combined_content\n",
+    "\n",
+    "\n",
+    "def memory_consolidation_worker(user_id: str):\n",
+    "    \"\"\"\n",
+    "    Worker that periodically consolidates memories for the active user.\n",
+    "\n",
+    "    NOTE: In production, this would probably use a background task framework, such\n",
+    "          as rq or Celery, and run on a schedule.\n",
+    "    \"\"\"\n",
+    "    while True:\n",
+    "        try:\n",
+    "            consolidate_memories(user_id)\n",
+    "            # Run every 10 minutes\n",
+    "            time.sleep(10 * 60)\n",
+    "        except Exception as e:\n",
+    "            logger.exception(f\"Error in memory consolidation worker: {e}\")\n",
+    "            # If there's an error, wait an hour and try again\n",
+    "            time.sleep(60 * 60)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## The Main Loop\n",
+    "\n",
+    "Now we can put everything together and run the main loop.\n",
+    "\n",
+    "Running this cell should ask for your OpenAI and Tavily keys, then a username\n",
+    "and thread ID. You'll enter a loop in which you can enter queries and see\n",
+    "responses from the agent printed below the following cell."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import threading\n",
+    "\n",
+    "\n",
+    "def main(thread_id: str = \"book_flight\", user_id: str = \"demo_user\"):\n",
+    "    \"\"\"Main interaction loop for the travel agent\"\"\"\n",
+    "    print(\"Welcome to the Travel Assistant! (Type 'exit' to quit)\")\n",
+    "\n",
+    "    config = RunnableConfig(configurable={\"thread_id\": thread_id, \"user_id\": user_id})\n",
+    "    state = RuntimeState(messages=[])\n",
+    "\n",
+    "    # If we're using the manual memory strategy, we need to create a queue for\n",
+    "    # memory processing and start a worker thread. After every 'round' of a\n",
+    "    # conversation, the main loop will add the current state and config to the\n",
+    "    # queue for memory processing.\n",
+    "    if memory_strategy == MemoryStrategy.MANUAL:\n",
+    "        # Create a queue for memory processing\n",
+    "        memory_queue = Queue()\n",
+    "\n",
+    "        # Start a worker thread that will process memory extraction tasks\n",
+    "        memory_thread = threading.Thread(\n",
+    "            target=memory_worker, args=(memory_queue, user_id), daemon=True\n",
+    "        )\n",
+    "        memory_thread.start()\n",
+    "\n",
+    "    # We always run consolidation in the background, regardless of memory strategy.\n",
+    "    consolidation_thread = threading.Thread(\n",
+    "        target=memory_consolidation_worker, args=(user_id,), daemon=True\n",
+    "    )\n",
+    "    consolidation_thread.start()\n",
+    "\n",
+    "    while True:\n",
+    "        user_input = input(\"\\nYou (type 'quit' to quit): \")\n",
+    "\n",
+    "        if not user_input:\n",
+    "            continue\n",
+    "\n",
+    "        if user_input.lower() in [\"exit\", \"quit\"]:\n",
+    "            print(\"Thank you for using the Travel Assistant. Goodbye!\")\n",
+    "            break\n",
+    "\n",
+    "        state[\"messages\"].append(HumanMessage(content=user_input))\n",
+    "\n",
+    "        try:\n",
+    "            # Process user input through the graph\n",
+    "            for result in graph.stream(state, config=config, stream_mode=\"values\"):\n",
+    "                state = RuntimeState(**result)\n",
+    "\n",
+    "            logger.debug(f\"# of messages after run: {len(state['messages'])}\")\n",
+    "\n",
+    "            # Find the most recent AI message, so we can print the response\n",
+    "            ai_messages = [m for m in state[\"messages\"] if isinstance(m, AIMessage)]\n",
+    "            if ai_messages:\n",
+    "                message = ai_messages[-1].content\n",
+    "            else:\n",
+    "                logger.error(\"No AI messages after run\")\n",
+    "                message = \"I'm sorry, I couldn't process your request properly.\"\n",
+    "                # Add the error message to the state\n",
+    "                state[\"messages\"].append(AIMessage(content=message))\n",
+    "\n",
+    "            print(f\"\\nAssistant: {message}\")\n",
+    "\n",
+    "            # Add the current state to the memory processing queue\n",
+    "            if memory_strategy == MemoryStrategy.MANUAL:\n",
+    "                memory_queue.put((state.copy(), config))\n",
+    "\n",
+    "        except Exception as e:\n",
+    "            logger.exception(f\"Error processing request: {e}\")\n",
+    "            error_message = \"I'm sorry, I encountered an error processing your request.\"\n",
+    "            print(f\"\\nAssistant: {error_message}\")\n",
+    "            # Add the error message to the state\n",
+    "            state[\"messages\"].append(AIMessage(content=error_message))\n",
+    "\n",
+    "\n",
+    "try:\n",
+    "    user_id = input(\"Enter a user ID: \") or \"demo_user\"\n",
+    "    thread_id = input(\"Enter a thread ID: \") or \"demo_thread\"\n",
+    "except Exception:\n",
+    "    # If we're running in CI, we don't have a terminal to input from, so just exit\n",
+    "    exit()\n",
+    "else:\n",
+    "    main(thread_id, user_id)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## That's a Wrap!\n",
+    "\n",
+    "Want to make your own agent? Try the [LangGraph Quickstart](https://langchain-ai.github.io/langgraph/tutorials/introduction/). Then add our [Redis checkpointer](https://github.com/redis-developer/langgraph-redis) to give your agent fast, persistent memory!"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "env",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.11.11"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
 }
diff --git a/examples/travel_agent.py b/examples/travel_agent.py
new file mode 100644
index 0000000..1b418b6
--- /dev/null
+++ b/examples/travel_agent.py
@@ -0,0 +1,535 @@
+#!/usr/bin/env python3
+"""
+Travel Agent using Agent Memory Server with Web Search
+
+This script demonstrates how to manage both short-term and long-term agent memory
+using the Agent Memory Server with optional web search capabilities. The agent manages:
+
+1. Working memory via the memory server (session messages and data)
+2. Long-term memory storage and retrieval via the memory server
+3. Memory extraction and contextual retrieval
+4. Conversation flow without LangGraph dependencies
+5. Optional cached web search using Tavily API with Redis caching
+6. Automatic discovery and use of all available memory client tools
+
+Web search features:
+- Cached web search results using Redis to avoid rate limiting
+- Function calling integration with OpenAI for intelligent search decisions
+- Automatic fallback when Tavily API key or Redis are not available
+
+Environment variables:
+- OPENAI_API_KEY: Required for OpenAI ChatGPT
+- TAVILY_API_KEY: Optional for web search functionality
+- MEMORY_SERVER_URL: Memory server URL (default: http://localhost:8000)
+- REDIS_URL: Redis URL for caching (default: redis://localhost:6379)
+"""
+
+import asyncio
+import json
+import logging
+import os
+import textwrap
+
+from agent_memory_client import (
+    MemoryAPIClient,
+    create_memory_client,
+)
+from agent_memory_client.models import (
+    WorkingMemory,
+)
+from langchain_community.tools.tavily_search import TavilySearchResults
+from langchain_core.callbacks.manager import CallbackManagerForToolRun
+from langchain_openai import ChatOpenAI
+from redis import Redis
+
+
+# Configure logging
+logging.basicConfig(level=logging.INFO)
+logger = logging.getLogger(__name__)
+
+# Environment setup
+MEMORY_SERVER_URL = os.getenv("MEMORY_SERVER_URL", "http://localhost:8000")
+REDIS_URL = os.getenv("REDIS_URL", "redis://localhost:6379")
+DEFAULT_USER = "demo_user"
+
+MAX_WEB_SEARCH_RESULTS = 3
+
+SYSTEM_PROMPT = {
+    "role": "system",
+    "content": textwrap.dedent("""
+                You are a helpful travel assistant. You can help with travel-related questions.
+                You have access to conversation history and memory management tools to provide
+                personalized responses.
+                
+                Available tools:
+                
+                1. **web_search** (if available): Search for current travel information, weather, 
+                   events, or other up-to-date data when specifically needed.
+                   
+                2. **Memory Management Tools** (always available):
+                   - **search_memory**: Look up previous conversations and stored information
+                   - **get_working_memory**: Check current session context
+                   - **add_memory_to_working_memory**: Store important preferences or information
+                   - **update_working_memory_data**: Save session-specific data
+                
+                **Guidelines**:
+                - Answer the user's actual question first and directly
+                - When someone shares information (like "I like X"), simply acknowledge it naturally - don't immediately give advice or suggestions unless they ask
+                - Search memory or web when it would be helpful for the current conversation
+                - Don't assume the user is actively planning a trip unless they explicitly say so
+                - Be conversational and natural - respond to what the user actually says
+                - When sharing memories, simply state what you remember rather than turning it into advice
+                - Only offer suggestions, recommendations, or tips if the user explicitly asks for them
+                - Store preferences and important details, but don't be overly eager about it
+                - If someone shares a preference, respond like a friend would - acknowledge it, maybe ask a follow-up question, but don't launch into advice
+                
+                Be helpful, friendly, and responsive. Mirror their conversational style - if they're just chatting, chat back. If they ask for help, then help.
+                """),
+}
+
+
+class CachingTavilySearchResults(TavilySearchResults):
+    """
+    An interface to Tavily search that caches results in Redis.
+
+    Caching the results of the web search allows us to avoid rate limiting,
+    improve latency, and reduce costs.
+    """
+
+    def __init__(self, redis_client: Redis, *args, **kwargs):
+        super().__init__(*args, **kwargs)
+        self.redis_client = redis_client
+
+    def _run(
+        self,
+        query: str,
+        run_manager: CallbackManagerForToolRun | None = None,
+    ) -> tuple[list[dict[str, str]] | str, dict]:
+        """Use the tool."""
+        cache_key = f"tavily_search:{query}"
+        cached_result: str | None = self.redis_client.get(cache_key)  # type: ignore
+        if cached_result:
+            return json.loads(cached_result), {}
+        result, raw_results = super()._run(query, run_manager)
+        self.redis_client.set(cache_key, json.dumps(result), ex=60 * 60)
+        return result, raw_results
+
+
+class TravelAgent:
+    """
+    Travel Agent with comprehensive memory management capabilities.
+
+    Uses the Agent Memory Server client with automatic discovery and integration
+    of all available memory tools. Supports web search when configured.
+    """
+
+    def __init__(self):
+        self._memory_client: MemoryAPIClient | None = None
+        self._redis_client: Redis | None = None
+        self._web_search_tool: CachingTavilySearchResults | None = None
+
+        # Initialize LLMs and tools
+        self._setup_llms()
+        self._setup_tools()
+
+    def _get_namespace(self, user_id: str) -> str:
+        """Generate consistent namespace for a user."""
+        return f"travel_agent:{user_id}"
+
+    async def get_client(self) -> MemoryAPIClient:
+        """Get the memory client, initializing it if needed."""
+        if not self._memory_client:
+            self._memory_client = await create_memory_client(
+                base_url=MEMORY_SERVER_URL,
+                timeout=30.0,
+                default_model_name="gpt-4o",  # Configure model for auto-summarization
+            )
+        return self._memory_client
+
+    def _setup_llms(self):
+        """Set up the LLM instances."""
+        # Define the web search tool function
+        web_search_function = {
+            "name": "web_search",
+            "description": textwrap.dedent("""
+              Search the web for current information about travel destinations,
+              requirements, weather, events, or any other travel-related
+              queries. Use this when you need up-to-date information that may
+              not be in your training data.
+            """),
+            "parameters": {
+                "type": "object",
+                "properties": {
+                    "query": {
+                        "type": "string",
+                        "description": "The search query to find relevant travel information",
+                    }
+                },
+                "required": ["query"],
+            },
+        }
+
+        # Set up available functions list
+        available_functions = []
+
+        # Web search (optional)
+        if os.getenv("TAVILY_API_KEY"):
+            available_functions.append(web_search_function)
+
+        # Memory tools (always available) - get all available tool schemas from client
+        memory_tool_schemas = MemoryAPIClient.get_all_memory_tool_schemas()
+
+        # Extract function schemas from tool schemas
+        for tool_schema in memory_tool_schemas:
+            available_functions.append(tool_schema["function"])
+
+        logger.info(
+            f"Available memory tools: {[tool['function']['name'] for tool in memory_tool_schemas]}"
+        )
+
+        # Log all available tools for debugging
+        all_tool_names = []
+        if os.getenv("TAVILY_API_KEY"):
+            all_tool_names.append("web_search")
+        all_tool_names.extend(
+            [tool["function"]["name"] for tool in memory_tool_schemas]
+        )
+        logger.info(f"Total available tools: {all_tool_names}")
+
+        # Set up LLM with function calling
+        if available_functions:
+            self.llm = ChatOpenAI(model="gpt-4o", temperature=0.7).bind_functions(
+                available_functions
+            )
+        else:
+            self.llm = ChatOpenAI(model="gpt-4o", temperature=0.7)
+
+    def _setup_tools(self):
+        """Set up tools including web search if Tavily API key is available."""
+        # Set up Redis client for caching
+        try:
+            self._redis_client = Redis.from_url(REDIS_URL)
+            self._redis_client.ping()
+            logger.info("Redis client connected for caching")
+        except Exception as e:
+            logger.warning(f"Could not connect to Redis for caching: {e}")
+            self._redis_client = None
+
+        # Set up web search tool if Tavily API key is available
+        if os.getenv("TAVILY_API_KEY") and self._redis_client:
+            try:
+                self._web_search_tool = CachingTavilySearchResults(
+                    redis_client=self._redis_client, max_results=MAX_WEB_SEARCH_RESULTS
+                )
+                logger.info("Web search tool with caching enabled")
+            except Exception as e:
+                logger.warning(f"Could not set up web search tool: {e}")
+                self._web_search_tool = None
+        else:
+            if not os.getenv("TAVILY_API_KEY"):
+                logger.info("TAVILY_API_KEY not set, web search disabled")
+            if not self._redis_client:
+                logger.info("Redis not available, web search caching disabled")
+
+    async def cleanup(self):
+        """Clean up resources."""
+        if self._memory_client:
+            await self._memory_client.close()
+            logger.info("Memory client closed")
+        if self._redis_client:
+            self._redis_client.close()
+            logger.info("Redis client closed")
+
+    async def _get_working_memory(self, session_id: str, user_id: str) -> WorkingMemory:
+        """Get working memory for a session, creating it if it doesn't exist."""
+        client = await self.get_client()
+        result = await client.get_working_memory(
+            session_id=session_id,
+            namespace=self._get_namespace(user_id),
+            window_size=15,
+        )
+        return WorkingMemory(**result.model_dump())
+
+    async def _search_web(self, query: str) -> str:
+        """Perform a web search if the tool is available."""
+        if not self._web_search_tool:
+            return "Web search is not available. Please ensure TAVILY_API_KEY is set and Redis is connected."
+
+        try:
+            results, _ = self._web_search_tool._run(query)
+            if isinstance(results, str):
+                return results
+
+            # Format the results
+            formatted_results = []
+            for result in results:
+                title = result.get("title", "No title")
+                content = result.get("content", "No content")
+                url = result.get("url", "No URL")
+                formatted_results.append(f"**{title}**\n{content}\nSource: {url}")
+
+            return "\n\n".join(formatted_results)
+        except Exception as e:
+            logger.error(f"Error performing web search: {e}")
+            return f"Error performing web search: {str(e)}"
+
+    async def _add_message_to_working_memory(
+        self, session_id: str, user_id: str, role: str, content: str
+    ) -> WorkingMemory:
+        """Add a message to working memory."""
+        # Add new message
+        new_message = [{"role": role, "content": content}]
+
+        # Get the memory client and save updated working memory
+        client = await self.get_client()
+        await client.append_messages_to_working_memory(
+            session_id=session_id,
+            messages=new_message,
+            namespace=self._get_namespace(user_id),
+        )
+
+    async def _handle_function_call(
+        self,
+        function_call: dict,
+        context_messages: list,
+        session_id: str,
+        user_id: str,
+    ) -> str:
+        """Handle function calls for both web search and memory tools."""
+        function_name = function_call["name"]
+
+        # Handle web search separately (not a memory function)
+        if function_name == "web_search":
+            return await self._handle_web_search_call(function_call, context_messages)
+
+        # Handle all memory functions using the client's unified resolver
+        return await self._handle_memory_tool_call(
+            function_call, context_messages, session_id, user_id
+        )
+
+    async def _handle_web_search_call(
+        self, function_call: dict, context_messages: list
+    ) -> str:
+        """Handle web search function calls."""
+        print("Searching the web...")
+        try:
+            function_args = json.loads(function_call["arguments"])
+            query = function_args.get("query", "")
+
+            # Perform the web search
+            search_results = await self._search_web(query)
+
+            # Generate a follow-up response with the search results
+            follow_up_messages = context_messages + [
+                {
+                    "role": "assistant",
+                    "content": f"I'll search for that information: {query}",
+                },
+                {
+                    "role": "function",
+                    "name": "web_search",
+                    "content": search_results,
+                },
+                {
+                    "role": "user",
+                    "content": "Please provide a helpful response based on the search results.",
+                },
+            ]
+
+            final_response = self.llm.invoke(follow_up_messages)
+            return str(final_response.content)
+
+        except (json.JSONDecodeError, TypeError):
+            logger.error(f"Invalid web search arguments: {function_call['arguments']}")
+            return "I'm sorry, I encountered an error processing your web search request. Please try again."
+
+    async def _handle_memory_tool_call(
+        self,
+        function_call: dict,
+        context_messages: list,
+        session_id: str,
+        user_id: str,
+    ) -> str:
+        """Handle memory tool function calls using the client's unified resolver."""
+        function_name = function_call["name"]
+        client = await self.get_client()
+
+        print("Accessing memory...")
+        result = await client.resolve_tool_call(
+            tool_call=function_call,  # Pass the entire function call object
+            session_id=session_id,
+            namespace=self._get_namespace(user_id),
+        )
+
+        if not result["success"]:
+            logger.error(f"Function call failed: {result['error']}")
+            return result["formatted_response"]
+
+        # Generate a follow-up response with the function result
+        follow_up_messages = context_messages + [
+            {
+                "role": "assistant",
+                "content": f"Let me {function_name.replace('_', ' ')}...",
+            },
+            {
+                "role": "function",
+                "name": function_name,
+                "content": result["formatted_response"],
+            },
+            {
+                "role": "user",
+                "content": "Please provide a helpful response based on this information.",
+            },
+        ]
+
+        final_response = self.llm.invoke(follow_up_messages)
+        return str(final_response.content)
+
+    async def _generate_response(
+        self, session_id: str, user_id: str, user_input: str
+    ) -> str:
+        """Generate a response using the LLM with conversation context."""
+        # Manage conversation history
+        working_memory = await self._get_working_memory(session_id, user_id)
+        context_messages = working_memory.messages
+
+        # Always ensure system prompt is at the beginning
+        # Remove any existing system messages and add our current one
+        context_messages = [
+            msg for msg in context_messages if msg.get("role") != "system"
+        ]
+        context_messages.insert(0, SYSTEM_PROMPT)
+
+        # Note: user input has already been added to working memory,
+        # so we don't need to add it again here
+
+        try:
+            logger.info(f"Context messages: {context_messages}")
+            response = self.llm.invoke(context_messages)
+
+            # Handle function calls using unified approach
+            if (
+                hasattr(response, "additional_kwargs")
+                and "function_call" in response.additional_kwargs
+            ):
+                return await self._handle_function_call(
+                    response.additional_kwargs["function_call"],
+                    context_messages,
+                    session_id,
+                    user_id,
+                )
+
+            return str(response.content)
+        except Exception as e:
+            logger.error(f"Error generating response: {e}")
+            return "I'm sorry, I encountered an error processing your request."
+
+    async def process_user_input(
+        self, user_input: str, session_id: str, user_id: str
+    ) -> str:
+        """Process user input and return assistant response."""
+        try:
+            # Add user message to working memory first
+            await self._add_message_to_working_memory(
+                session_id, user_id, "user", user_input
+            )
+
+            response = await self._generate_response(session_id, user_id, user_input)
+            await self._add_message_to_working_memory(
+                session_id, user_id, "assistant", response
+            )
+            return response
+
+        except Exception as e:
+            logger.exception(f"Error processing user input: {e}")
+            return "I'm sorry, I encountered an error processing your request."
+
+    async def run_async(
+        self, session_id: str = "travel_session", user_id: str = DEFAULT_USER
+    ):
+        """Main async interaction loop for the travel agent."""
+        print("Welcome to the Travel Assistant! (Type 'exit' to quit)")
+        print(f"Session ID: {session_id}, User ID: {user_id}")
+        print()
+
+        try:
+            while True:
+                user_input = input("\nYou (type 'quit' to quit): ")
+
+                if not user_input.strip():
+                    continue
+
+                if user_input.lower() in ["exit", "quit"]:
+                    print("Thank you for using the Travel Assistant. Goodbye!")
+                    break
+
+                # Process input and get response
+                response = await self.process_user_input(
+                    user_input, session_id, user_id
+                )
+                print(f"\nAssistant: {response}")
+
+        except KeyboardInterrupt:
+            print("\nGoodbye!")
+        finally:
+            await self.cleanup()
+
+    def run(self, session_id: str = "travel_session", user_id: str = DEFAULT_USER):
+        """Synchronous wrapper for the async run method."""
+        asyncio.run(self.run_async(session_id, user_id))
+
+
+def main():
+    """Main entry point"""
+    import argparse
+
+    parser = argparse.ArgumentParser(description="Travel Agent with Memory Server")
+    parser.add_argument("--user-id", default=DEFAULT_USER, help="User ID")
+    parser.add_argument(
+        "--session-id", default="demo_travel_session", help="Session ID"
+    )
+    parser.add_argument(
+        "--memory-server-url", default="http://localhost:8000", help="Memory server URL"
+    )
+    parser.add_argument(
+        "--redis-url", default="redis://localhost:6379", help="Redis URL for caching"
+    )
+
+    args = parser.parse_args()
+
+    # Check for required API keys
+    if not os.getenv("OPENAI_API_KEY"):
+        print("Error: OPENAI_API_KEY environment variable is required")
+        return
+
+    # Check for optional Tavily API key
+    if not os.getenv("TAVILY_API_KEY"):
+        print(
+            "Note: TAVILY_API_KEY not set - web search functionality will be disabled"
+        )
+        print("To enable web search, set TAVILY_API_KEY environment variable")
+
+    # Check for Redis connection
+    redis_url = args.redis_url if hasattr(args, "redis_url") else REDIS_URL
+    print(f"Using Redis at: {redis_url} for caching (if available)")
+
+    # Set memory server URL from argument if provided
+    if args.memory_server_url:
+        os.environ["MEMORY_SERVER_URL"] = args.memory_server_url
+
+    # Set Redis URL from argument if provided
+    if args.redis_url:
+        os.environ["REDIS_URL"] = args.redis_url
+
+    try:
+        agent = TravelAgent()
+        agent.run(session_id=args.session_id, user_id=args.user_id)
+    except KeyboardInterrupt:
+        print("\nGoodbye!")
+    except Exception as e:
+        logger.error(f"Error running travel agent: {e}")
+        raise
+
+
+if __name__ == "__main__":
+    main()
diff --git a/pyproject.toml b/pyproject.toml
index b4855cd..cfd480e 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -12,7 +12,6 @@ license = { text = "MIT" }
 authors = [{ name = "Andrew Brookins", email = "andrew.brookins@redis.com" }]
 dependencies = [
     "accelerate>=1.6.0",
-    "agent-memory-client",
     "anthropic>=0.15.0",
     "bertopic<0.17.0,>=0.16.4",
     "fastapi>=0.115.11",
@@ -128,11 +127,12 @@ dev = [
     "pytest>=8.3.5",
     "pytest-asyncio>=0.23.0",
     "pytest-xdist>=3.5.0",
+    "pytest-cov>=4.0.0",
     "ruff>=0.3.0",
     "testcontainers>=3.7.0",
     "pre-commit>=3.6.0",
     "freezegun>=1.2.0",
-    "-e ./agent-memory-client",
+    "mypy>=1.16.1",
 ]
 
 [tool.ruff.lint.per-file-ignores]
diff --git a/tests/conftest.py b/tests/conftest.py
index fc44671..76fc23b 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -222,8 +222,10 @@ def redis_container(request):
     os.environ["COMPOSE_PROJECT_NAME"] = f"redis_test_{worker_id}"
     os.environ.setdefault("REDIS_IMAGE", "redis/redis-stack-server:latest")
 
+    current_dir = os.path.dirname(os.path.abspath(__file__))
+
     compose = DockerCompose(
-        context="tests",
+        context=current_dir,
         compose_file_name="docker-compose.yml",
         pull=True,
     )
@@ -241,7 +243,35 @@ def redis_url(redis_container):
     on container port 6379 (mapped to an ephemeral port on the host).
     """
     host, port = redis_container.get_service_host_and_port("redis", 6379)
-    return f"redis://{host}:{port}"
+
+    # On macOS, testcontainers sometimes returns 0.0.0.0 which doesn't work
+    # Replace with localhost if we get 0.0.0.0
+    if host == "0.0.0.0":
+        host = "localhost"
+
+    redis_url = f"redis://{host}:{port}"
+
+    # Verify the connection works before returning with retries
+    import time
+
+    import redis
+
+    max_retries = 10
+    retry_delay = 1
+
+    for attempt in range(max_retries):
+        try:
+            client = redis.Redis.from_url(redis_url)
+            client.ping()
+            break  # Connection successful
+        except Exception as e:
+            if attempt == max_retries - 1:
+                raise ConnectionError(
+                    f"Failed to connect to Redis at {redis_url} after {max_retries} attempts: {e}"
+                ) from e
+            time.sleep(retry_delay)
+
+    return redis_url
 
 
 @pytest.fixture()
diff --git a/tests/docker-compose.yml b/tests/docker-compose.yml
index d2ef295..a0368a1 100644
--- a/tests/docker-compose.yml
+++ b/tests/docker-compose.yml
@@ -1,14 +1,12 @@
-version: "3.9"
 services:
   redis:
-    image: "${REDIS_IMAGE}"
+    image: "${REDIS_IMAGE:-redis/redis-stack-server:latest}"
     ports:
       - "6379"
     environment:
       - "REDIS_ARGS=--save '' --appendonly no"
-    deploy:
-      replicas: 1
-      restart_policy:
-        condition: on-failure
-    labels:
-      - "com.docker.compose.publishers=redis,6379,6379"
+    healthcheck:
+      test: ["CMD", "redis-cli", "ping"]
+      interval: 5s
+      timeout: 3s
+      retries: 5
diff --git a/tests/test_api.py b/tests/test_api.py
index 8c222f3..ed8729c 100644
--- a/tests/test_api.py
+++ b/tests/test_api.py
@@ -365,7 +365,7 @@ class TestMemoryPromptEndpoint:
     async def test_memory_prompt_with_session_id(self, mock_get_working_memory, client):
         """Test the memory_prompt endpoint with only session_id provided"""
         # Mock the session memory
-        mock_session_memory = WorkingMemoryResponse(
+        mock_working_memory = WorkingMemoryResponse(
             messages=[
                 MemoryMessage(role="user", content="Hello"),
                 MemoryMessage(role="assistant", content="Hi there"),
@@ -375,7 +375,7 @@ async def test_memory_prompt_with_session_id(self, mock_get_working_memory, clie
             context="Previous conversation context",
             tokens=150,
         )
-        mock_get_working_memory.return_value = mock_session_memory
+        mock_get_working_memory.return_value = mock_working_memory
 
         # Call the endpoint
         query = "What's the weather like?"
@@ -464,7 +464,7 @@ async def test_memory_prompt_with_both_sources(
     ):
         """Test the memory_prompt endpoint with both session_id and long_term_search_payload"""
         # Mock session memory
-        mock_session_memory = WorkingMemoryResponse(
+        mock_working_memory = WorkingMemoryResponse(
             messages=[
                 MemoryMessage(role="user", content="How do you make pasta?"),
                 MemoryMessage(
@@ -477,7 +477,7 @@ async def test_memory_prompt_with_both_sources(
             context="Cooking conversation",
             tokens=200,
         )
-        mock_get_working_memory.return_value = mock_session_memory
+        mock_get_working_memory.return_value = mock_working_memory
 
         # Mock the long-term memory search
         mock_search.return_value = MemoryRecordResultsResponse(
@@ -592,7 +592,7 @@ async def test_memory_prompt_with_model_name(
         mock_get_model_config.return_value = model_config
 
         # Mock the session memory
-        mock_session_memory = WorkingMemoryResponse(
+        mock_working_memory = WorkingMemoryResponse(
             messages=[
                 MemoryMessage(role="user", content="Hello"),
                 MemoryMessage(role="assistant", content="Hi there"),
@@ -602,7 +602,7 @@ async def test_memory_prompt_with_model_name(
             context="Previous context",
             tokens=150,
         )
-        mock_get_working_memory.return_value = mock_session_memory
+        mock_get_working_memory.return_value = mock_working_memory
 
         # Call the endpoint with model_name
         query = "What's the weather like?"
diff --git a/tests/test_client_api.py b/tests/test_client_api.py
index 01dc9e8..6af1f5a 100644
--- a/tests/test_client_api.py
+++ b/tests/test_client_api.py
@@ -9,7 +9,7 @@
 
 import pytest
 from agent_memory_client import MemoryAPIClient, MemoryClientConfig
-from agent_memory_client.filters import Namespace, SessionId, Topics
+from agent_memory_client.filters import Namespace, SessionId, Topics, UserId
 from fastapi import FastAPI
 from httpx import ASGITransport, AsyncClient
 from mcp.server.fastmcp.prompts import base
@@ -115,9 +115,9 @@ async def test_session_lifecycle(memory_test_client: MemoryAPIClient):
         mock_set_memory.return_value = None
 
         # Step 1: Create new session memory
-        response = await memory_test_client.put_session_memory(session_id, memory)
-        assert response.messages[0].content == "Hello from the client!"
-        assert response.messages[1].content == "Hi there, I'm the memory server!"
+        response = await memory_test_client.put_working_memory(session_id, memory)
+        assert response.messages[0]["content"] == "Hello from the client!"
+        assert response.messages[1]["content"] == "Hi there, I'm the memory server!"
         assert response.context == "This is a test session created by the API client."
 
     # Next, mock GET response for retrieving session memory
@@ -130,10 +130,10 @@ async def test_session_lifecycle(memory_test_client: MemoryAPIClient):
         mock_get_memory.return_value = mock_response
 
         # Step 2: Retrieve the session memory
-        session = await memory_test_client.get_session_memory(session_id)
+        session = await memory_test_client.get_working_memory(session_id)
         assert len(session.messages) == 2
-        assert session.messages[0].content == "Hello from the client!"
-        assert session.messages[1].content == "Hi there, I'm the memory server!"
+        assert session.messages[0]["content"] == "Hello from the client!"
+        assert session.messages[1]["content"] == "Hi there, I'm the memory server!"
         assert session.context == "This is a test session created by the API client."
 
     # Mock list sessions
@@ -153,7 +153,7 @@ async def test_session_lifecycle(memory_test_client: MemoryAPIClient):
         mock_delete.return_value = None
 
         # Step 4: Delete the session
-        response = await memory_test_client.delete_session_memory(session_id)
+        response = await memory_test_client.delete_working_memory(session_id)
         assert response.status == "ok"
 
     # Verify it's gone by mocking a 404 response
@@ -163,7 +163,7 @@ async def test_session_lifecycle(memory_test_client: MemoryAPIClient):
         mock_get_memory.return_value = None
 
         # This should not raise an error anymore since the unified API returns empty working memory instead of 404
-        session = await memory_test_client.get_session_memory(session_id)
+        session = await memory_test_client.get_working_memory(session_id)
         assert len(session.messages) == 0  # Should return empty working memory
 
 
@@ -226,8 +226,8 @@ async def test_long_term_memory(memory_test_client: MemoryAPIClient):
         with patch("agent_memory_server.api.settings.long_term_memory", True):
             results = await memory_test_client.search_long_term_memory(
                 text="What color does the user prefer?",
-                user_id={"eq": "test-user"},
-                topics={"any": ["colors", "preferences"]},
+                user_id=UserId(eq="test-user"),
+                topics=Topics(any=["colors", "preferences"]),
             )
 
             assert results.total == 2
diff --git a/tests/test_client_enhancements.py b/tests/test_client_enhancements.py
index 2d871ce..e5154e0 100644
--- a/tests/test_client_enhancements.py
+++ b/tests/test_client_enhancements.py
@@ -1,10 +1,3 @@
-"""
-Test file for the enhanced Memory API Client functionality.
-
-Tests for new features like lifecycle management, batch operations,
-pagination utilities, validation, and enhanced convenience methods.
-"""
-
 import asyncio
 from collections.abc import AsyncGenerator
 from unittest.mock import patch
@@ -86,7 +79,7 @@ async def test_promote_working_memories_to_long_term(self, enhanced_test_client)
         )
 
         with (
-            patch.object(enhanced_test_client, "get_session_memory") as mock_get,
+            patch.object(enhanced_test_client, "get_working_memory") as mock_get,
             patch.object(
                 enhanced_test_client, "create_long_term_memory"
             ) as mock_create,
@@ -131,7 +124,7 @@ async def test_promote_specific_memory_ids(self, enhanced_test_client):
         )
 
         with (
-            patch.object(enhanced_test_client, "get_session_memory") as mock_get,
+            patch.object(enhanced_test_client, "get_working_memory") as mock_get,
             patch.object(
                 enhanced_test_client, "create_long_term_memory"
             ) as mock_create,
@@ -165,7 +158,7 @@ async def test_promote_no_memories(self, enhanced_test_client):
             user_id=None,
         )
 
-        with patch.object(enhanced_test_client, "get_session_memory") as mock_get:
+        with patch.object(enhanced_test_client, "get_working_memory") as mock_get:
             mock_get.return_value = working_memory_response
 
             result = await enhanced_test_client.promote_working_memories_to_long_term(
@@ -449,8 +442,8 @@ async def test_update_working_memory_data_merge(self, enhanced_test_client):
         )
 
         with (
-            patch.object(enhanced_test_client, "get_session_memory") as mock_get,
-            patch.object(enhanced_test_client, "put_session_memory") as mock_put,
+            patch.object(enhanced_test_client, "get_working_memory") as mock_get,
+            patch.object(enhanced_test_client, "put_working_memory") as mock_put,
         ):
             mock_get.return_value = existing_memory
             mock_put.return_value = existing_memory
@@ -488,8 +481,8 @@ async def test_update_working_memory_data_replace(self, enhanced_test_client):
         )
 
         with (
-            patch.object(enhanced_test_client, "get_session_memory") as mock_get,
-            patch.object(enhanced_test_client, "put_session_memory") as mock_put,
+            patch.object(enhanced_test_client, "get_working_memory") as mock_get,
+            patch.object(enhanced_test_client, "put_working_memory") as mock_put,
         ):
             mock_get.return_value = existing_memory
             mock_put.return_value = existing_memory
@@ -524,8 +517,8 @@ async def test_update_working_memory_data_deep_merge(self, enhanced_test_client)
         )
 
         with (
-            patch.object(enhanced_test_client, "get_session_memory") as mock_get,
-            patch.object(enhanced_test_client, "put_session_memory") as mock_put,
+            patch.object(enhanced_test_client, "get_working_memory") as mock_get,
+            patch.object(enhanced_test_client, "put_working_memory") as mock_put,
         ):
             mock_get.return_value = existing_memory
             mock_put.return_value = existing_memory
@@ -574,8 +567,8 @@ async def test_append_messages_to_working_memory(self, enhanced_test_client):
         ]
 
         with (
-            patch.object(enhanced_test_client, "get_session_memory") as mock_get,
-            patch.object(enhanced_test_client, "put_session_memory") as mock_put,
+            patch.object(enhanced_test_client, "get_working_memory") as mock_get,
+            patch.object(enhanced_test_client, "put_working_memory") as mock_put,
         ):
             mock_get.return_value = existing_memory
             mock_put.return_value = existing_memory
@@ -588,9 +581,9 @@ async def test_append_messages_to_working_memory(self, enhanced_test_client):
             # Check that messages were appended
             working_memory_arg = mock_put.call_args[0][1]
             assert len(working_memory_arg.messages) == 3
-            assert working_memory_arg.messages[0].content == "First message"
-            assert working_memory_arg.messages[1].content == "Second message"
-            assert working_memory_arg.messages[2].content == "Third message"
+            assert working_memory_arg.messages[0]["content"] == "First message"
+            assert working_memory_arg.messages[1]["content"] == "Second message"
+            assert working_memory_arg.messages[2]["content"] == "Third message"
 
     def test_deep_merge_dicts(self, enhanced_test_client):
         """Test the deep merge dictionary utility method."""
diff --git a/tests/test_client_tool_calls.py b/tests/test_client_tool_calls.py
new file mode 100644
index 0000000..3d73b72
--- /dev/null
+++ b/tests/test_client_tool_calls.py
@@ -0,0 +1,587 @@
+"""
+Test file for the Memory API Client tool call functionality.
+
+Tests for multi-provider tool call parsing, resolution, and schema generation.
+"""
+
+import json
+from collections.abc import AsyncGenerator
+from unittest.mock import patch
+
+import pytest
+from agent_memory_client import MemoryAPIClient, MemoryClientConfig
+from fastapi import FastAPI
+from httpx import ASGITransport, AsyncClient
+
+from agent_memory_server.api import router as memory_router
+from agent_memory_server.healthcheck import router as health_router
+
+
+@pytest.fixture
+def memory_app() -> FastAPI:
+    """Create a test FastAPI app with memory routers for testing the client."""
+    app = FastAPI()
+    app.include_router(health_router)
+    app.include_router(memory_router)
+    return app
+
+
+@pytest.fixture
+async def tool_call_test_client(
+    memory_app: FastAPI,
+) -> AsyncGenerator[MemoryAPIClient, None]:
+    """Create a memory client that uses the test FastAPI app."""
+    async with AsyncClient(
+        transport=ASGITransport(app=memory_app),
+        base_url="http://test",
+    ) as http_client:
+        config = MemoryClientConfig(
+            base_url="http://test", default_namespace="test-namespace"
+        )
+        client = MemoryAPIClient(config)
+        client._client = http_client
+        yield client
+
+
+class TestToolCallParsing:
+    """Tests for tool call parsing across different provider formats."""
+
+    def test_parse_openai_function_call(self):
+        """Test parsing OpenAI legacy function call format."""
+        function_call = {
+            "name": "search_memory",
+            "arguments": json.dumps({"query": "user preferences", "max_results": 5}),
+        }
+
+        result = MemoryAPIClient.parse_openai_function_call(function_call)
+
+        assert result["id"] is None
+        assert result["name"] == "search_memory"
+        assert result["arguments"]["query"] == "user preferences"
+        assert result["arguments"]["max_results"] == 5
+        assert result["provider"] == "openai"
+
+    def test_parse_openai_function_call_invalid_json(self):
+        """Test parsing OpenAI function call with invalid JSON arguments."""
+        function_call = {"name": "search_memory", "arguments": "invalid json"}
+
+        result = MemoryAPIClient.parse_openai_function_call(function_call)
+
+        assert result["name"] == "search_memory"
+        assert result["arguments"] == {}
+        assert result["provider"] == "openai"
+
+    def test_parse_openai_tool_call(self):
+        """Test parsing OpenAI current tool call format."""
+        tool_call = {
+            "id": "call_123",
+            "type": "function",
+            "function": {
+                "name": "add_memory_to_working_memory",
+                "arguments": json.dumps(
+                    {"text": "User likes pizza", "memory_type": "semantic"}
+                ),
+            },
+        }
+
+        result = MemoryAPIClient.parse_openai_tool_call(tool_call)
+
+        assert result["id"] == "call_123"
+        assert result["name"] == "add_memory_to_working_memory"
+        assert result["arguments"]["text"] == "User likes pizza"
+        assert result["arguments"]["memory_type"] == "semantic"
+        assert result["provider"] == "openai"
+
+    def test_parse_openai_tool_call_dict_arguments(self):
+        """Test parsing OpenAI tool call with dict arguments (not JSON string)."""
+        tool_call = {
+            "id": "call_456",
+            "type": "function",
+            "function": {
+                "name": "get_working_memory",
+                "arguments": {"session_id": "test"},
+            },
+        }
+
+        result = MemoryAPIClient.parse_openai_tool_call(tool_call)
+
+        assert result["id"] == "call_456"
+        assert result["name"] == "get_working_memory"
+        assert result["arguments"]["session_id"] == "test"
+        assert result["provider"] == "openai"
+
+    def test_parse_anthropic_tool_use(self):
+        """Test parsing Anthropic tool use format."""
+        tool_use = {
+            "type": "tool_use",
+            "id": "tool_789",
+            "name": "update_working_memory_data",
+            "input": {
+                "data": {"preferences": {"theme": "dark"}},
+                "merge_strategy": "merge",
+            },
+        }
+
+        result = MemoryAPIClient.parse_anthropic_tool_use(tool_use)
+
+        assert result["id"] == "tool_789"
+        assert result["name"] == "update_working_memory_data"
+        assert result["arguments"]["data"]["preferences"]["theme"] == "dark"
+        assert result["arguments"]["merge_strategy"] == "merge"
+        assert result["provider"] == "anthropic"
+
+    def test_parse_tool_call_auto_detect_formats(self):
+        """Test automatic detection of different formats."""
+        # Anthropic format
+        anthropic_call = {
+            "type": "tool_use",
+            "id": "tool_auto",
+            "name": "search_memory",
+            "input": {"query": "test"},
+        }
+        result = MemoryAPIClient.parse_tool_call(anthropic_call)
+        assert result["provider"] == "anthropic"
+
+        # OpenAI current format
+        openai_current = {
+            "id": "call_auto",
+            "type": "function",
+            "function": {
+                "name": "search_memory",
+                "arguments": json.dumps({"query": "test"}),
+            },
+        }
+        result = MemoryAPIClient.parse_tool_call(openai_current)
+        assert result["provider"] == "openai"
+
+        # OpenAI legacy format
+        openai_legacy = {
+            "name": "search_memory",
+            "arguments": json.dumps({"query": "test"}),
+        }
+        result = MemoryAPIClient.parse_tool_call(openai_legacy)
+        assert result["provider"] == "openai"
+
+
+class TestToolCallResolution:
+    """Tests for tool call resolution functionality."""
+
+    @pytest.mark.asyncio
+    async def test_resolve_function_call_search_memory(self, tool_call_test_client):
+        """Test resolving search_memory function call."""
+        mock_result = {
+            "memories": [{"text": "test memory", "memory_type": "semantic"}],
+            "total_found": 1,
+            "query": "test",
+            "summary": "Found 1 relevant memories for: test",
+        }
+
+        with patch.object(
+            tool_call_test_client, "search_memory_tool", return_value=mock_result
+        ):
+            result = await tool_call_test_client.resolve_function_call(
+                function_name="search_memory",
+                function_arguments={"query": "test", "max_results": 3},
+                session_id="test_session",
+            )
+
+            assert result["success"] is True
+            assert result["function_name"] == "search_memory"
+            assert result["result"] == mock_result
+            assert result["error"] is None
+            assert "Found 1 relevant memories" in result["formatted_response"]
+
+    @pytest.mark.asyncio
+    async def test_resolve_function_call_get_working_memory(
+        self, tool_call_test_client
+    ):
+        """Test resolving get_working_memory function call."""
+        mock_result = {
+            "session_id": "test_session",
+            "message_count": 5,
+            "memory_count": 2,
+            "summary": "Session has 5 messages, 2 stored memories, and 0 data entries",
+        }
+
+        with patch.object(
+            tool_call_test_client, "get_working_memory_tool", return_value=mock_result
+        ):
+            result = await tool_call_test_client.resolve_function_call(
+                function_name="get_working_memory",
+                function_arguments={},
+                session_id="test_session",
+            )
+
+            assert result["success"] is True
+            assert result["function_name"] == "get_working_memory"
+            assert result["result"] == mock_result
+            assert "Session has 5 messages" in result["formatted_response"]
+
+    @pytest.mark.asyncio
+    async def test_resolve_function_call_add_memory(self, tool_call_test_client):
+        """Test resolving add_memory_to_working_memory function call."""
+        mock_result = {
+            "success": True,
+            "memory_type": "semantic",
+            "text_preview": "User prefers dark mode...",
+            "summary": "Successfully stored semantic memory: User prefers dark mode...",
+        }
+
+        with patch.object(
+            tool_call_test_client, "add_memory_tool", return_value=mock_result
+        ):
+            result = await tool_call_test_client.resolve_function_call(
+                function_name="add_memory_to_working_memory",
+                function_arguments={
+                    "text": "User prefers dark mode",
+                    "memory_type": "semantic",
+                    "topics": ["preferences"],
+                },
+                session_id="test_session",
+            )
+
+            assert result["success"] is True
+            assert result["function_name"] == "add_memory_to_working_memory"
+            assert result["result"] == mock_result
+            assert "Successfully stored semantic memory" in result["formatted_response"]
+
+    @pytest.mark.asyncio
+    async def test_resolve_function_call_update_data(self, tool_call_test_client):
+        """Test resolving update_working_memory_data function call."""
+        mock_result = {
+            "success": True,
+            "updated_keys": ["user_settings"],
+            "merge_strategy": "merge",
+            "summary": "Successfully updated 1 data entries using merge strategy",
+        }
+
+        with patch.object(
+            tool_call_test_client, "update_memory_data_tool", return_value=mock_result
+        ):
+            result = await tool_call_test_client.resolve_function_call(
+                function_name="update_working_memory_data",
+                function_arguments={
+                    "data": {"user_settings": {"theme": "dark"}},
+                    "merge_strategy": "merge",
+                },
+                session_id="test_session",
+            )
+
+            assert result["success"] is True
+            assert result["function_name"] == "update_working_memory_data"
+            assert result["result"] == mock_result
+            assert "Successfully updated 1 data entries" in result["formatted_response"]
+
+    @pytest.mark.asyncio
+    async def test_resolve_function_call_unknown_function(self, tool_call_test_client):
+        """Test resolving unknown function call."""
+        result = await tool_call_test_client.resolve_function_call(
+            function_name="unknown_function",
+            function_arguments={},
+            session_id="test_session",
+        )
+
+        assert result["success"] is False
+        assert result["function_name"] == "unknown_function"
+        assert result["result"] is None
+        assert "Unknown function: unknown_function" in result["error"]
+        assert "don't know how to handle" in result["formatted_response"]
+
+    @pytest.mark.asyncio
+    async def test_resolve_function_call_invalid_json_arguments(
+        self, tool_call_test_client
+    ):
+        """Test resolving function call with invalid JSON arguments."""
+        result = await tool_call_test_client.resolve_function_call(
+            function_name="search_memory",
+            function_arguments="invalid json",
+            session_id="test_session",
+        )
+
+        assert result["success"] is False
+        assert result["function_name"] == "search_memory"
+        assert result["result"] is None
+        assert "JSON decode error" in result["error"]
+        assert "error parsing the function arguments" in result["formatted_response"]
+
+    @pytest.mark.asyncio
+    async def test_resolve_function_call_missing_required_args(
+        self, tool_call_test_client
+    ):
+        """Test resolving function call with missing required arguments."""
+        result = await tool_call_test_client.resolve_function_call(
+            function_name="search_memory",
+            function_arguments={},  # Missing required 'query' parameter
+            session_id="test_session",
+        )
+
+        assert result["success"] is False
+        assert result["function_name"] == "search_memory"
+        assert result["result"] is None
+        assert "Query parameter is required" in result["error"]
+
+    @pytest.mark.asyncio
+    async def test_resolve_tool_call_openai_legacy(self, tool_call_test_client):
+        """Test resolving OpenAI legacy format tool call."""
+        tool_call = {
+            "name": "search_memory",
+            "arguments": json.dumps({"query": "test"}),
+        }
+
+        mock_result = {
+            "memories": [],
+            "total_found": 0,
+            "query": "test",
+            "summary": "Found 0 relevant memories for: test",
+        }
+
+        with patch.object(
+            tool_call_test_client, "search_memory_tool", return_value=mock_result
+        ):
+            result = await tool_call_test_client.resolve_tool_call(
+                tool_call=tool_call, session_id="test_session"
+            )
+
+            assert result["success"] is True
+            assert result["function_name"] == "search_memory"
+            assert "Found 0 relevant memories" in result["formatted_response"]
+
+    @pytest.mark.asyncio
+    async def test_resolve_tool_call_anthropic(self, tool_call_test_client):
+        """Test resolving Anthropic format tool call."""
+        tool_call = {
+            "type": "tool_use",
+            "id": "tool_123",
+            "name": "get_working_memory",
+            "input": {},
+        }
+
+        mock_result = {
+            "session_id": "test_session",
+            "summary": "Session has 0 messages, 0 stored memories, and 0 data entries",
+        }
+
+        with patch.object(
+            tool_call_test_client, "get_working_memory_tool", return_value=mock_result
+        ):
+            result = await tool_call_test_client.resolve_tool_call(
+                tool_call=tool_call, session_id="test_session"
+            )
+
+            assert result["success"] is True
+            assert result["function_name"] == "get_working_memory"
+            assert "Session has 0 messages" in result["formatted_response"]
+
+    @pytest.mark.asyncio
+    async def test_resolve_tool_calls_batch(self, tool_call_test_client):
+        """Test resolving multiple tool calls in batch."""
+        tool_calls = [
+            {"name": "search_memory", "arguments": json.dumps({"query": "test1"})},
+            {
+                "type": "tool_use",
+                "id": "tool_2",
+                "name": "get_working_memory",
+                "input": {},
+            },
+        ]
+
+        mock_search_result = {"summary": "Search result"}
+        mock_memory_result = {"summary": "Memory state"}
+
+        with (
+            patch.object(
+                tool_call_test_client,
+                "search_memory_tool",
+                return_value=mock_search_result,
+            ),
+            patch.object(
+                tool_call_test_client,
+                "get_working_memory_tool",
+                return_value=mock_memory_result,
+            ),
+        ):
+            results = await tool_call_test_client.resolve_tool_calls(
+                tool_calls=tool_calls, session_id="test_session"
+            )
+
+            assert len(results) == 2
+            assert results[0]["success"] is True
+            assert results[0]["function_name"] == "search_memory"
+            assert results[1]["success"] is True
+            assert results[1]["function_name"] == "get_working_memory"
+
+
+class TestToolSchemaGeneration:
+    """Tests for tool schema generation in different formats."""
+
+    def test_get_memory_search_tool_schema(self):
+        """Test getting memory search tool schema in OpenAI format."""
+        schema = MemoryAPIClient.get_memory_search_tool_schema()
+
+        assert schema["type"] == "function"
+        assert schema["function"]["name"] == "search_memory"
+        assert "description" in schema["function"]
+        assert "parameters" in schema["function"]
+        assert schema["function"]["parameters"]["type"] == "object"
+        assert "query" in schema["function"]["parameters"]["properties"]
+        assert "query" in schema["function"]["parameters"]["required"]
+
+    def test_get_memory_search_tool_schema_anthropic(self):
+        """Test getting memory search tool schema in Anthropic format."""
+        schema = MemoryAPIClient.get_memory_search_tool_schema_anthropic()
+
+        assert schema["name"] == "search_memory"
+        assert "description" in schema
+        assert "input_schema" in schema
+        assert schema["input_schema"]["type"] == "object"
+        assert "query" in schema["input_schema"]["properties"]
+        assert "query" in schema["input_schema"]["required"]
+
+    def test_get_all_memory_tool_schemas(self):
+        """Test getting all memory tool schemas in OpenAI format."""
+        schemas = MemoryAPIClient.get_all_memory_tool_schemas()
+
+        assert len(schemas) == 4
+        assert all(schema["type"] == "function" for schema in schemas)
+
+        function_names = [schema["function"]["name"] for schema in schemas]
+        expected_names = [
+            "search_memory",
+            "get_working_memory",
+            "add_memory_to_working_memory",
+            "update_working_memory_data",
+        ]
+        assert set(function_names) == set(expected_names)
+
+    def test_get_all_memory_tool_schemas_anthropic(self):
+        """Test getting all memory tool schemas in Anthropic format."""
+        schemas = MemoryAPIClient.get_all_memory_tool_schemas_anthropic()
+
+        assert len(schemas) == 4
+        assert all("name" in schema and "input_schema" in schema for schema in schemas)
+
+        function_names = [schema["name"] for schema in schemas]
+        expected_names = [
+            "search_memory",
+            "get_working_memory",
+            "add_memory_to_working_memory",
+            "update_working_memory_data",
+        ]
+        assert set(function_names) == set(expected_names)
+
+    def test_convert_openai_to_anthropic_schema(self):
+        """Test converting OpenAI schema to Anthropic format."""
+        openai_schema = {
+            "type": "function",
+            "function": {
+                "name": "test_function",
+                "description": "Test function description",
+                "parameters": {
+                    "type": "object",
+                    "properties": {
+                        "param1": {"type": "string"},
+                        "param2": {"type": "integer"},
+                    },
+                    "required": ["param1"],
+                },
+            },
+        }
+
+        anthropic_schema = MemoryAPIClient._convert_openai_to_anthropic_schema(
+            openai_schema
+        )
+
+        assert anthropic_schema["name"] == "test_function"
+        assert anthropic_schema["description"] == "Test function description"
+        assert anthropic_schema["input_schema"]["type"] == "object"
+        assert (
+            anthropic_schema["input_schema"]["properties"]["param1"]["type"] == "string"
+        )
+        assert anthropic_schema["input_schema"]["required"] == ["param1"]
+
+
+class TestToolCallErrorHandling:
+    """Tests for tool call error handling and edge cases."""
+
+    @pytest.mark.asyncio
+    async def test_resolve_tool_call_parse_error(self, tool_call_test_client):
+        """Test resolving tool call that causes parse error."""
+        # Malformed tool call that should cause parsing issues
+        tool_call = {"malformed": "data"}
+
+        result = await tool_call_test_client.resolve_tool_call(
+            tool_call=tool_call, session_id="test_session"
+        )
+
+        # Should still work with generic parsing but with empty name
+        assert result["success"] is False
+        # The function name will be empty string, not "unknown"
+        assert result["function_name"] == ""
+
+    @pytest.mark.asyncio
+    async def test_resolve_function_call_exception_handling(
+        self, tool_call_test_client
+    ):
+        """Test that exceptions in tool methods are properly handled."""
+        with patch.object(
+            tool_call_test_client,
+            "search_memory_tool",
+            side_effect=Exception("Tool error"),
+        ):
+            result = await tool_call_test_client.resolve_function_call(
+                function_name="search_memory",
+                function_arguments={"query": "test"},
+                session_id="test_session",
+            )
+
+            assert result["success"] is False
+            assert result["function_name"] == "search_memory"
+            assert result["result"] is None
+            assert "Tool error" in result["error"]
+            assert "error while executing search_memory" in result["formatted_response"]
+
+    def test_parse_tool_call_edge_cases(self):
+        """Test tool call parsing with edge cases."""
+        # Empty tool call
+        empty_call = {}
+        result = MemoryAPIClient.parse_tool_call(empty_call)
+        assert result["name"] == ""
+        assert result["arguments"] == {}
+        assert result["provider"] == "generic"
+
+        # Tool call with None values
+        none_call = {"name": None, "arguments": None}
+        result = MemoryAPIClient.parse_tool_call(none_call)
+        # The actual parsing returns None for name and arguments, not empty values
+        assert result["name"] is None
+        assert result["arguments"] is None
+
+    @pytest.mark.asyncio
+    async def test_resolve_function_calls_legacy_method(self, tool_call_test_client):
+        """Test the legacy resolve_function_calls method still works."""
+        function_calls = [
+            {"name": "search_memory", "arguments": {"query": "test1"}},
+            {"name": "get_working_memory", "arguments": {}},
+        ]
+
+        mock_search_result = {"summary": "Search result"}
+        mock_memory_result = {"summary": "Memory state"}
+
+        with (
+            patch.object(
+                tool_call_test_client,
+                "search_memory_tool",
+                return_value=mock_search_result,
+            ),
+            patch.object(
+                tool_call_test_client,
+                "get_working_memory_tool",
+                return_value=mock_memory_result,
+            ),
+        ):
+            results = await tool_call_test_client.resolve_function_calls(
+                function_calls=function_calls, session_id="test_session"
+            )
+
+            assert len(results) == 2
+            assert all(result["success"] for result in results)
diff --git a/tests/test_mcp.py b/tests/test_mcp.py
index 63d44cd..7be3092 100644
--- a/tests/test_mcp.py
+++ b/tests/test_mcp.py
@@ -297,7 +297,7 @@ async def test_set_working_memory_tool(self, mcp_test_setup):
 
         async with client_session(mcp_app._mcp_server) as client:
             with patch(
-                "agent_memory_server.mcp.core_put_session_memory"
+                "agent_memory_server.mcp.core_put_working_memory"
             ) as mock_put_memory:
                 mock_put_memory.return_value = mock_response
 
@@ -360,7 +360,7 @@ async def test_set_working_memory_with_json_data(self, mcp_test_setup):
 
         async with client_session(mcp_app._mcp_server) as client:
             with patch(
-                "agent_memory_server.mcp.core_put_session_memory"
+                "agent_memory_server.mcp.core_put_working_memory"
             ) as mock_put_memory:
                 mock_put_memory.return_value = mock_response
 
@@ -408,7 +408,7 @@ async def test_set_working_memory_auto_id_generation(self, mcp_test_setup):
 
         async with client_session(mcp_app._mcp_server) as client:
             with patch(
-                "agent_memory_server.mcp.core_put_session_memory"
+                "agent_memory_server.mcp.core_put_working_memory"
             ) as mock_put_memory:
                 mock_put_memory.return_value = mock_response
 
@@ -430,7 +430,7 @@ async def test_set_working_memory_auto_id_generation(self, mcp_test_setup):
 
                 # Verify ID was auto-generated
                 call_args = mock_put_memory.call_args
-                # core_put_session_memory is called with keyword args: session_id, memory, background_tasks
+                # core_put_working_memory is called with keyword args: session_id, memory, background_tasks
                 if call_args and call_args.kwargs.get("memory"):
                     working_memory = call_args.kwargs["memory"]
                     memory = working_memory.memories[0]
diff --git a/uv.lock b/uv.lock
index 693b735..0ba3608 100644
--- a/uv.lock
+++ b/uv.lock
@@ -19,33 +19,11 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/63/b1/8198e3cdd11a426b1df2912e3381018c4a4a55368f6d0857ba3ca418ef93/accelerate-1.6.0-py3-none-any.whl", hash = "sha256:1aee717d3d3735ad6d09710a7c26990ee4652b79b4e93df46551551b5227c2aa", size = 354748 },
 ]
 
-[[package]]
-name = "agent-memory-client"
-source = { directory = "agent-memory-client" }
-dependencies = [
-    { name = "httpx" },
-    { name = "pydantic" },
-    { name = "ulid-py" },
-]
-
-[package.metadata]
-requires-dist = [
-    { name = "httpx", specifier = ">=0.25.0" },
-    { name = "mypy", marker = "extra == 'dev'", specifier = ">=1.5.0" },
-    { name = "pydantic", specifier = ">=2.0.0" },
-    { name = "pytest", marker = "extra == 'dev'", specifier = ">=7.0.0" },
-    { name = "pytest-asyncio", marker = "extra == 'dev'", specifier = ">=0.21.0" },
-    { name = "pytest-httpx", marker = "extra == 'dev'", specifier = ">=0.21.0" },
-    { name = "ruff", marker = "extra == 'dev'", specifier = ">=0.1.0" },
-    { name = "ulid-py", specifier = ">=1.1.0" },
-]
-
 [[package]]
 name = "agent-memory-server"
 source = { editable = "." }
 dependencies = [
     { name = "accelerate" },
-    { name = "agent-memory-client" },
     { name = "anthropic" },
     { name = "bertopic" },
     { name = "click" },
@@ -75,9 +53,11 @@ dependencies = [
 [package.dev-dependencies]
 dev = [
     { name = "freezegun" },
+    { name = "mypy" },
     { name = "pre-commit" },
     { name = "pytest" },
     { name = "pytest-asyncio" },
+    { name = "pytest-cov" },
     { name = "pytest-xdist" },
     { name = "ruff" },
     { name = "testcontainers" },
@@ -86,7 +66,6 @@ dev = [
 [package.metadata]
 requires-dist = [
     { name = "accelerate", specifier = ">=1.6.0" },
-    { name = "agent-memory-client", directory = "agent-memory-client" },
     { name = "anthropic", specifier = ">=0.15.0" },
     { name = "bertopic", specifier = ">=0.16.4,<0.17.0" },
     { name = "click", specifier = ">=8.1.0" },
@@ -116,9 +95,11 @@ requires-dist = [
 [package.metadata.requires-dev]
 dev = [
     { name = "freezegun", specifier = ">=1.2.0" },
+    { name = "mypy", specifier = ">=1.16.1" },
     { name = "pre-commit", specifier = ">=3.6.0" },
     { name = "pytest", specifier = ">=8.3.5" },
     { name = "pytest-asyncio", specifier = ">=0.23.0" },
+    { name = "pytest-cov", specifier = ">=4.0.0" },
     { name = "pytest-xdist", specifier = ">=3.5.0" },
     { name = "ruff", specifier = ">=0.3.0" },
     { name = "testcontainers", specifier = ">=3.7.0" },
@@ -276,6 +257,26 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/d1/d6/3965ed04c63042e047cb6a3e6ed1a63a35087b6a609aa3a15ed8ac56c221/colorama-0.4.6-py2.py3-none-any.whl", hash = "sha256:4f1d9991f5acc0ca119f9d443620b77f9d6b33703e51011c16baf57afb285fc6", size = 25335 },
 ]
 
+[[package]]
+name = "coverage"
+version = "7.9.1"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/e7/e0/98670a80884f64578f0c22cd70c5e81a6e07b08167721c7487b4d70a7ca0/coverage-7.9.1.tar.gz", hash = "sha256:6cf43c78c4282708a28e466316935ec7489a9c487518a77fa68f716c67909cec", size = 813650 }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/68/d9/7f66eb0a8f2fce222de7bdc2046ec41cb31fe33fb55a330037833fb88afc/coverage-7.9.1-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:a8de12b4b87c20de895f10567639c0797b621b22897b0af3ce4b4e204a743626", size = 212336 },
+    { url = "https://files.pythonhosted.org/packages/20/20/e07cb920ef3addf20f052ee3d54906e57407b6aeee3227a9c91eea38a665/coverage-7.9.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:5add197315a054e92cee1b5f686a2bcba60c4c3e66ee3de77ace6c867bdee7cb", size = 212571 },
+    { url = "https://files.pythonhosted.org/packages/78/f8/96f155de7e9e248ca9c8ff1a40a521d944ba48bec65352da9be2463745bf/coverage-7.9.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:600a1d4106fe66f41e5d0136dfbc68fe7200a5cbe85610ddf094f8f22e1b0300", size = 246377 },
+    { url = "https://files.pythonhosted.org/packages/3e/cf/1d783bd05b7bca5c10ded5f946068909372e94615a4416afadfe3f63492d/coverage-7.9.1-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:2a876e4c3e5a2a1715a6608906aa5a2e0475b9c0f68343c2ada98110512ab1d8", size = 243394 },
+    { url = "https://files.pythonhosted.org/packages/02/dd/e7b20afd35b0a1abea09fb3998e1abc9f9bd953bee548f235aebd2b11401/coverage-7.9.1-cp312-cp312-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:81f34346dd63010453922c8e628a52ea2d2ccd73cb2487f7700ac531b247c8a5", size = 245586 },
+    { url = "https://files.pythonhosted.org/packages/4e/38/b30b0006fea9d617d1cb8e43b1bc9a96af11eff42b87eb8c716cf4d37469/coverage-7.9.1-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:888f8eee13f2377ce86d44f338968eedec3291876b0b8a7289247ba52cb984cd", size = 245396 },
+    { url = "https://files.pythonhosted.org/packages/31/e4/4d8ec1dc826e16791f3daf1b50943e8e7e1eb70e8efa7abb03936ff48418/coverage-7.9.1-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:9969ef1e69b8c8e1e70d591f91bbc37fc9a3621e447525d1602801a24ceda898", size = 243577 },
+    { url = "https://files.pythonhosted.org/packages/25/f4/b0e96c5c38e6e40ef465c4bc7f138863e2909c00e54a331da335faf0d81a/coverage-7.9.1-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:60c458224331ee3f1a5b472773e4a085cc27a86a0b48205409d364272d67140d", size = 244809 },
+    { url = "https://files.pythonhosted.org/packages/8a/65/27e0a1fa5e2e5079bdca4521be2f5dabf516f94e29a0defed35ac2382eb2/coverage-7.9.1-cp312-cp312-win32.whl", hash = "sha256:5f646a99a8c2b3ff4c6a6e081f78fad0dde275cd59f8f49dc4eab2e394332e74", size = 214724 },
+    { url = "https://files.pythonhosted.org/packages/9b/a8/d5b128633fd1a5e0401a4160d02fa15986209a9e47717174f99dc2f7166d/coverage-7.9.1-cp312-cp312-win_amd64.whl", hash = "sha256:30f445f85c353090b83e552dcbbdad3ec84c7967e108c3ae54556ca69955563e", size = 215535 },
+    { url = "https://files.pythonhosted.org/packages/a3/37/84bba9d2afabc3611f3e4325ee2c6a47cd449b580d4a606b240ce5a6f9bf/coverage-7.9.1-cp312-cp312-win_arm64.whl", hash = "sha256:af41da5dca398d3474129c58cb2b106a5d93bbb196be0d307ac82311ca234342", size = 213904 },
+    { url = "https://files.pythonhosted.org/packages/08/b8/7ddd1e8ba9701dea08ce22029917140e6f66a859427406579fd8d0ca7274/coverage-7.9.1-py3-none-any.whl", hash = "sha256:66b974b145aa189516b6bf2d8423e888b742517d37872f6ee4c5be0073bd9a3c", size = 204000 },
+]
+
 [[package]]
 name = "cryptography"
 version = "45.0.3"
@@ -687,6 +688,35 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/43/e3/7d92a15f894aa0c9c4b49b8ee9ac9850d6e63b03c9c32c0367a13ae62209/mpmath-1.3.0-py3-none-any.whl", hash = "sha256:a0b2b9fe80bbcd81a6647ff13108738cfb482d481d826cc0e02f5b35e5c88d2c", size = 536198 },
 ]
 
+[[package]]
+name = "mypy"
+version = "1.16.1"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "mypy-extensions" },
+    { name = "pathspec" },
+    { name = "typing-extensions" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/81/69/92c7fa98112e4d9eb075a239caa4ef4649ad7d441545ccffbd5e34607cbb/mypy-1.16.1.tar.gz", hash = "sha256:6bd00a0a2094841c5e47e7374bb42b83d64c527a502e3334e1173a0c24437bab", size = 3324747 }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/b4/d6/39482e5fcc724c15bf6280ff5806548c7185e0c090712a3736ed4d07e8b7/mypy-1.16.1-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:af4792433f09575d9eeca5c63d7d90ca4aeceda9d8355e136f80f8967639183d", size = 11066493 },
+    { url = "https://files.pythonhosted.org/packages/e6/e5/26c347890efc6b757f4d5bb83f4a0cf5958b8cf49c938ac99b8b72b420a6/mypy-1.16.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:66df38405fd8466ce3517eda1f6640611a0b8e70895e2a9462d1d4323c5eb4b9", size = 10081687 },
+    { url = "https://files.pythonhosted.org/packages/44/c7/b5cb264c97b86914487d6a24bd8688c0172e37ec0f43e93b9691cae9468b/mypy-1.16.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:44e7acddb3c48bd2713994d098729494117803616e116032af192871aed80b79", size = 11839723 },
+    { url = "https://files.pythonhosted.org/packages/15/f8/491997a9b8a554204f834ed4816bda813aefda31cf873bb099deee3c9a99/mypy-1.16.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:0ab5eca37b50188163fa7c1b73c685ac66c4e9bdee4a85c9adac0e91d8895e15", size = 12722980 },
+    { url = "https://files.pythonhosted.org/packages/df/f0/2bd41e174b5fd93bc9de9a28e4fb673113633b8a7f3a607fa4a73595e468/mypy-1.16.1-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:dedb6229b2c9086247e21a83c309754b9058b438704ad2f6807f0d8227f6ebdd", size = 12903328 },
+    { url = "https://files.pythonhosted.org/packages/61/81/5572108a7bec2c46b8aff7e9b524f371fe6ab5efb534d38d6b37b5490da8/mypy-1.16.1-cp312-cp312-win_amd64.whl", hash = "sha256:1f0435cf920e287ff68af3d10a118a73f212deb2ce087619eb4e648116d1fe9b", size = 9562321 },
+    { url = "https://files.pythonhosted.org/packages/cf/d3/53e684e78e07c1a2bf7105715e5edd09ce951fc3f47cf9ed095ec1b7a037/mypy-1.16.1-py3-none-any.whl", hash = "sha256:5fc2ac4027d0ef28d6ba69a0343737a23c4d1b83672bf38d1fe237bdc0643b37", size = 2265923 },
+]
+
+[[package]]
+name = "mypy-extensions"
+version = "1.1.0"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/a2/6e/371856a3fb9d31ca8dac321cda606860fa4548858c0cc45d9d1d4ca2628b/mypy_extensions-1.1.0.tar.gz", hash = "sha256:52e68efc3284861e772bbcd66823fde5ae21fd2fdb51c62a211403730b916558", size = 6343 }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/79/7b/2c79738432f5c924bef5071f933bcc9efd0473bac3b4aa584a6f7c1c8df8/mypy_extensions-1.1.0-py3-none-any.whl", hash = "sha256:1be4cccdb0f2482337c4743e60421de3a356cd97508abadd57d47403e94f5505", size = 4963 },
+]
+
 [[package]]
 name = "narwhals"
 version = "1.35.0"
@@ -970,6 +1000,15 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/29/d4/1244ab8edf173a10fd601f7e13b9566c1b525c4f365d6bee918e68381889/pandas-2.2.3-cp312-cp312-win_amd64.whl", hash = "sha256:59ef3764d0fe818125a5097d2ae867ca3fa64df032331b7e0917cf5d7bf66b13", size = 11504248 },
 ]
 
+[[package]]
+name = "pathspec"
+version = "0.12.1"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/ca/bc/f35b8446f4531a7cb215605d100cd88b7ac6f44ab3fc94870c120ab3adbf/pathspec-0.12.1.tar.gz", hash = "sha256:a482d51503a1ab33b1c67a6c3813a26953dbdc71c31dacaef9a838c4e29f5712", size = 51043 }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/cc/20/ff623b09d963f88bfde16306a54e12ee5ea43e9b597108672ff3a408aad6/pathspec-0.12.1-py3-none-any.whl", hash = "sha256:a0d503e138a4c123b27490a4f7beda6a01c6f288df0e4a8b79c7eb0dc7b4cc08", size = 31191 },
+]
+
 [[package]]
 name = "pillow"
 version = "11.2.1"
@@ -1212,6 +1251,20 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/20/7f/338843f449ace853647ace35870874f69a764d251872ed1b4de9f234822c/pytest_asyncio-0.26.0-py3-none-any.whl", hash = "sha256:7b51ed894f4fbea1340262bdae5135797ebbe21d8638978e35d31c6d19f72fb0", size = 19694 },
 ]
 
+[[package]]
+name = "pytest-cov"
+version = "6.2.1"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "coverage" },
+    { name = "pluggy" },
+    { name = "pytest" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/18/99/668cade231f434aaa59bbfbf49469068d2ddd945000621d3d165d2e7dd7b/pytest_cov-6.2.1.tar.gz", hash = "sha256:25cc6cc0a5358204b8108ecedc51a9b57b34cc6b8c967cc2c01a4e00d8a67da2", size = 69432 }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/bc/16/4ea354101abb1287856baa4af2732be351c7bee728065aed451b678153fd/pytest_cov-6.2.1-py3-none-any.whl", hash = "sha256:f5bc4c23f42f1cdd23c70b1dab1bbaef4fc505ba950d53e0081d0730dd7e86d5", size = 24644 },
+]
+
 [[package]]
 name = "pytest-xdist"
 version = "3.6.1"
@@ -1794,15 +1847,6 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/5c/23/c7abc0ca0a1526a0774eca151daeb8de62ec457e77262b66b359c3c7679e/tzdata-2025.2-py2.py3-none-any.whl", hash = "sha256:1a403fada01ff9221ca8044d701868fa132215d84beb92242d9acd2147f667a8", size = 347839 },
 ]
 
-[[package]]
-name = "ulid-py"
-version = "1.1.0"
-source = { registry = "https://pypi.org/simple" }
-sdist = { url = "https://files.pythonhosted.org/packages/3b/53/d14a8ec344048e21431821cb49e9a6722384f982b889c2dd449428dbdcc1/ulid-py-1.1.0.tar.gz", hash = "sha256:dc6884be91558df077c3011b9fb0c87d1097cb8fc6534b11f310161afd5738f0", size = 22514 }
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/42/7c/a12c879fe6c2b136a718c142115ff99397fbf62b4929d970d58ae386d55f/ulid_py-1.1.0-py2.py3-none-any.whl", hash = "sha256:b56a0f809ef90d6020b21b89a87a48edc7c03aea80e5ed5174172e82d76e3987", size = 25753 },
-]
-
 [[package]]
 name = "umap-learn"
 version = "0.5.7"

From fdeb69e3a3362d80edfdcc2d497a99e058470cfd Mon Sep 17 00:00:00 2001
From: Andrew Brookins <a.m.brookins@gmail.com>
Date: Tue, 17 Jun 2025 16:01:26 -0700
Subject: [PATCH 08/12] More client enhancements to tool-based access

---
 TASK_MEMORY.md                                |  26 ---
 .../agent_memory_client/client.py             |   3 +-
 agent_memory_server/extraction.py             |   2 +-
 agent_memory_server/mcp.py                    |   2 +-
 agent_memory_server/migrations.py             |   2 +-
 client-tools.md                               | 162 ++++++++++++++++++
 6 files changed, 166 insertions(+), 31 deletions(-)
 delete mode 100644 TASK_MEMORY.md
 create mode 100644 client-tools.md

diff --git a/TASK_MEMORY.md b/TASK_MEMORY.md
deleted file mode 100644
index 5b54042..0000000
--- a/TASK_MEMORY.md
+++ /dev/null
@@ -1,26 +0,0 @@
-# Task Memory
-
-**Created:** 2025-06-13 16:34:19
-**Branch:** feature/separate-client-codebase
-
-## Requirements
-
-Fix the errors generated with the command 'uv run mypy agent_memory_client'
-
-## Development Notes
-
-*Update this section as you work on the task. Include:*
-- *Progress updates*
-- *Key decisions made*
-- *Challenges encountered*
-- *Solutions implemented*
-- *Files modified*
-- *Testing notes*
-
-### Work Log
-
-- [2025-06-13 16:34:19] Task setup completed, TASK_MEMORY.md created
-
----
-
-*This file serves as your working memory for this task. Keep it updated as you progress through the implementation.*
diff --git a/agent-memory-client/agent_memory_client/client.py b/agent-memory-client/agent_memory_client/client.py
index 5cf736c..f9e4ffe 100644
--- a/agent-memory-client/agent_memory_client/client.py
+++ b/agent-memory-client/agent_memory_client/client.py
@@ -40,7 +40,6 @@
     WorkingMemoryResponse,
 )
 
-
 # === Tool Call Type Definitions ===
 
 
@@ -820,7 +819,7 @@ async def search_memory_tool(
                 result = await client.search_memory_tool(**args)
             ```
         """
-        from .filters import Topics, Entities, MemoryType
+        from .filters import Entities, MemoryType, Topics
 
         # Convert simple parameters to filter objects
         topics_filter = Topics(any=topics) if topics else None
diff --git a/agent_memory_server/extraction.py b/agent_memory_server/extraction.py
index bed9682..e2e1808 100644
--- a/agent_memory_server/extraction.py
+++ b/agent_memory_server/extraction.py
@@ -2,6 +2,7 @@
 import os
 from typing import Any
 
+import ulid
 from bertopic import BERTopic
 from redis.asyncio.client import Redis
 from redisvl.query.filter import Tag
@@ -9,7 +10,6 @@
 from tenacity.asyncio import AsyncRetrying
 from tenacity.stop import stop_after_attempt
 from transformers import AutoModelForTokenClassification, AutoTokenizer, pipeline
-import ulid
 
 from agent_memory_server.config import settings
 from agent_memory_server.llms import (
diff --git a/agent_memory_server/mcp.py b/agent_memory_server/mcp.py
index 9d171ff..05bae12 100644
--- a/agent_memory_server/mcp.py
+++ b/agent_memory_server/mcp.py
@@ -2,8 +2,8 @@
 import os
 from typing import Any
 
-from mcp.server.fastmcp import FastMCP as _FastMCPBase
 import ulid
+from mcp.server.fastmcp import FastMCP as _FastMCPBase
 
 from agent_memory_server.api import (
     create_long_term_memory as core_create_long_term_memory,
diff --git a/agent_memory_server/migrations.py b/agent_memory_server/migrations.py
index b2fe381..6e0b1f3 100644
--- a/agent_memory_server/migrations.py
+++ b/agent_memory_server/migrations.py
@@ -2,8 +2,8 @@
 Simplest possible migrations you could have.
 """
 
-from redis.asyncio import Redis
 import ulid
+from redis.asyncio import Redis
 
 from agent_memory_server.logging import get_logger
 from agent_memory_server.long_term_memory import generate_memory_hash
diff --git a/client-tools.md b/client-tools.md
new file mode 100644
index 0000000..79f91c7
--- /dev/null
+++ b/client-tools.md
@@ -0,0 +1,162 @@
+# Task Memory
+
+**Created:** 2025-06-13 16:34:19
+**Branch:** feature/separate-client-codebase
+
+## Requirements
+
+Fix the errors generated with the command 'uv run mypy agent_memory_client'
+
+## Development Notes
+
+*Update this section as you work on the task. Include:*
+- *Progress updates*
+- *Key decisions made*
+- *Challenges encountered*
+- *Solutions implemented*
+- *Files modified*
+- *Testing notes*
+
+### Work Log
+
+- [2025-06-13 16:34:19] Task setup completed, TASK_MEMORY.md created
+
+#### [2025-06-13 17:00:00] Completed mypy error fixes and namespace refactoring
+
+**Issues Addressed:**
+1. **Fixed mypy type errors in agent_memory_client:** Added py.typed marker file to indicate type information availability
+2. **Continued namespace refactoring in travel_agent.py:** Enhanced user ID integration into namespaces
+3. **Resolved import and type annotation issues:** Fixed all type-related errors in the travel agent example
+
+**Key Changes Made:**
+
+1. **Added py.typed marker:** Created `agent-memory-client/agent_memory_client/py.typed` to resolve import stub issues
+
+2. **Enhanced TravelAgent class with proper namespace handling:**
+   - Added `_get_namespace(user_id)` helper method for consistent namespace generation
+   - Refactored client management to support multiple users with per-user clients
+   - Updated `get_client()` to maintain separate `MemoryAPIClient` instances per user
+   - Fixed `cleanup()` method to properly close all client connections
+
+3. **Fixed type annotations throughout travel_agent.py:**
+   - Corrected `MemoryType` usage to use `MemoryTypeEnum` directly
+   - Added proper imports for `Namespace` and `MemoryRecordResult` filter types
+   - Updated method signatures to use correct return types (`MemoryRecordResult` vs `MemoryRecord`)
+   - Fixed namespace parameter usage in search methods to use `Namespace(eq=namespace_string)`
+
+4. **Ensured consistent namespace usage:**
+   - All memory operations now explicitly use the `travel_agent:{user_id}` namespace pattern
+   - Working memory operations correctly set namespace in memory objects
+   - Long-term memory search and storage operations use proper namespace filters
+
+**Files Modified:**
+- `agent-memory-client/agent_memory_client/py.typed` (created)
+- `examples/travel_agent.py` (extensively refactored)
+
+**Testing:**
+- ✅ `uv run mypy agent-memory-client/agent_memory_client` - Success: no issues found
+- ✅ `uv run mypy examples/travel_agent.py` - Success: no issues found
+
+**Key Decisions:**
+- Chose to maintain separate client instances per user for better isolation and namespace management
+- Used explicit `Namespace` filter objects rather than relying on default namespace configuration
+- Maintained backward compatibility with existing method signatures while fixing type annotations
+
+#### [2025-06-13 17:20:00] Removed redundant features that memory server already handles
+
+**Issues Addressed:**
+1. **Removed manual summarization:** Eliminated conversation summarization logic since memory server handles this automatically
+2. **Removed manual memory extraction:** Eliminated LLM-based memory extraction since memory server provides automatic extraction
+3. **Removed duplicate checking:** Eliminated manual similar memory checking since memory server handles deduplication
+4. **Removed manual memory retrieval and augmentation:** Simplified to rely on memory server's built-in capabilities
+
+**Key Simplifications:**
+
+1. **Removed summarization infrastructure:**
+   - Deleted `MESSAGE_SUMMARIZATION_THRESHOLD` constant
+   - Removed `_summarize_conversation()` method
+   - Eliminated summarization logic from `_add_message_to_working_memory()`
+   - Removed `summarizer` LLM instance
+
+2. **Removed manual memory management:**
+   - Deleted `Memory` and `Memories` Pydantic models
+   - Removed `MemoryStrategy` enum and related strategy logic
+   - Eliminated `_extract_memories_from_conversation()` method
+   - Removed `_store_long_term_memory()` method
+   - Deleted `_similar_memory_exists()` duplicate checking
+   - Removed `_retrieve_relevant_memories()` and `_augment_query_with_memories()` methods
+
+3. **Simplified class interface:**
+   - Updated `TravelAgent.__init__()` to remove strategy parameter
+   - Simplified `_setup_llms()` to only include main conversation LLM
+   - Streamlined `process_user_input()` to focus on core conversation flow
+   - Updated `_generate_response()` to work with basic user input instead of augmented queries
+
+4. **Cleaned up dependencies:**
+   - Removed unused imports: `Enum`, `BaseModel`, filter classes, memory model classes
+   - Simplified import structure to only include essential components
+   - Removed command-line strategy argument from main function
+
+**Rationale:**
+- Modern memory servers typically provide automatic conversation summarization when needed
+- Memory extraction, deduplication, and semantic retrieval are core memory server features
+- Simplifying the travel agent to focus on conversation flow while delegating memory management to the server
+- Reduces code complexity and maintenance burden while leveraging server capabilities
+
+**Files Modified:**
+- `examples/travel_agent.py` (significantly simplified - removed ~200 lines of redundant code)
+
+**Testing:**
+- ✅ `uv run mypy examples/travel_agent.py` - Success: no issues found  
+- ✅ Travel agent imports and instantiates successfully after simplification
+
+**Key Decisions:**
+- Prioritized simplicity and delegation to memory server over manual memory management
+- Maintained core conversation functionality while removing redundant features
+- Kept namespace management and multi-user support as these are application-specific concerns
+
+#### [2025-06-13 17:30:00] Simplified client management to single client with explicit namespaces
+
+**Issues Addressed:**
+1. **Overcomplicated client management:** Multiple clients per user was unnecessarily complex and resource-intensive
+2. **Inefficient resource usage:** One client per user consumed more memory and connections than needed
+3. **Complex lifecycle management:** Managing multiple client lifecycles was error-prone
+
+**Key Simplifications:**
+
+1. **Replaced per-user clients with single client:**
+   - Changed from `self._memory_clients: dict[str, MemoryAPIClient]` to `self._memory_client: MemoryAPIClient | None`
+   - Simplified `get_client()` method to return single client instance without user parameter
+   - Removed user-specific client initialization and storage logic
+
+2. **Explicit namespace management:**
+   - Removed `default_namespace` from client configuration
+   - Always pass namespace explicitly using `self._get_namespace(user_id)` in all operations
+   - Maintained namespace isolation while using shared client
+
+3. **Simplified cleanup:**
+   - Changed from iterating over multiple clients to single client cleanup
+   - Reduced cleanup complexity and potential for resource leaks
+
+**Benefits:**
+- **Memory efficiency:** Single client instead of multiple per-user clients
+- **Connection pooling:** Better HTTP connection reuse across users
+- **Simpler lifecycle:** One client to initialize and cleanup
+- **Maintained isolation:** User namespaces still properly isolated via explicit namespace parameters
+- **Cleaner code:** Less complexity in client management logic
+
+**Files Modified:**
+- `examples/travel_agent.py` (simplified client management)
+
+**Testing:**
+- ✅ `uv run mypy examples/travel_agent.py` - Success: no issues found
+- ✅ Single-client travel agent imports and instantiates successfully
+
+**Key Decisions:**
+- Prioritized efficiency and simplicity over perceived per-user client isolation
+- Maintained namespace-based user isolation through explicit parameters
+- Leveraged HTTP client connection pooling for better resource utilization
+
+---
+
+*This file serves as your working memory for this task. Keep it updated as you progress through the implementation.*

From 6a5f3ceac8d8320831dc2f5c9f46a5840c96bf7c Mon Sep 17 00:00:00 2001
From: Andrew Brookins <a.m.brookins@gmail.com>
Date: Tue, 17 Jun 2025 16:17:16 -0700
Subject: [PATCH 09/12] Fix mypy issues

---
 .../agent_memory_client/client.py             | 207 +-----------------
 .../agent_memory_client/models.py             |   2 +-
 agent-memory-client/tests/test_basic.py       |   1 -
 agent-memory-client/tests/test_client.py      |  29 ---
 agent_memory_server/api.py                    |   2 +-
 agent_memory_server/extraction.py             |   2 +-
 agent_memory_server/long_term_memory.py       |   8 +-
 agent_memory_server/mcp.py                    |   4 +-
 agent_memory_server/migrations.py             |   4 +-
 agent_memory_server/models.py                 |   2 +-
 tests/conftest.py                             |   4 +-
 tests/test_client_enhancements.py             |  29 ---
 tests/test_long_term_memory.py                |   4 +-
 13 files changed, 25 insertions(+), 273 deletions(-)

diff --git a/agent-memory-client/agent_memory_client/client.py b/agent-memory-client/agent_memory_client/client.py
index f9e4ffe..168da53 100644
--- a/agent-memory-client/agent_memory_client/client.py
+++ b/agent-memory-client/agent_memory_client/client.py
@@ -34,6 +34,7 @@
     HealthCheckResponse,
     MemoryRecord,
     MemoryRecordResults,
+    MemoryTypeEnum,
     ModelNameLiteral,
     SessionListResponse,
     WorkingMemory,
@@ -442,7 +443,7 @@ async def add_memories_to_working_memory(
         # Auto-generate IDs for memories that don't have them
         for memory in final_memories:
             if not memory.id:
-                memory.id = str(ulid.new())
+                memory.id = str(ulid.ULID())
 
         # Create new working memory with the memories
         working_memory = WorkingMemory(
@@ -617,136 +618,10 @@ async def search_long_term_memory(
                 exclude_none=True, mode="json"
             )
         if user_id:
-            payload["user_id"] = user_id.model_dump(exclude_none=True)
-        if memory_type:
-            payload["memory_type"] = memory_type.model_dump(exclude_none=True)
-        if distance_threshold is not None:
-            payload["distance_threshold"] = distance_threshold
-
-        try:
-            response = await self._client.post(
-                "/v1/long-term-memory/search",
-                json=payload,
-            )
-            response.raise_for_status()
-            return MemoryRecordResults(**response.json())
-        except httpx.HTTPStatusError as e:
-            self._handle_http_error(e.response)
-            raise
-
-    async def search_memories(
-        self,
-        text: str,
-        session_id: SessionId | dict[str, Any] | None = None,
-        namespace: Namespace | dict[str, Any] | None = None,
-        topics: Topics | dict[str, Any] | None = None,
-        entities: Entities | dict[str, Any] | None = None,
-        created_at: CreatedAt | dict[str, Any] | None = None,
-        last_accessed: LastAccessed | dict[str, Any] | None = None,
-        user_id: UserId | dict[str, Any] | None = None,
-        distance_threshold: float | None = None,
-        memory_type: MemoryType | dict[str, Any] | None = None,
-        limit: int = 10,
-        offset: int = 0,
-    ) -> MemoryRecordResults:
-        """
-        Search across all memory types (working memory and long-term memory).
-
-        This method searches both working memory (ephemeral, session-scoped) and
-        long-term memory (persistent, indexed) to provide comprehensive results.
-
-        For working memory:
-        - Uses simple text matching
-        - Searches across all sessions (unless session_id filter is provided)
-        - Returns memories that haven't been promoted to long-term storage
-
-        For long-term memory:
-        - Uses semantic vector search
-        - Includes promoted memories from working memory
-        - Supports advanced filtering by topics, entities, etc.
-
-        Args:
-            text: Search query text for semantic similarity
-            session_id: Optional session ID filter
-            namespace: Optional namespace filter
-            topics: Optional topics filter
-            entities: Optional entities filter
-            created_at: Optional creation date filter
-            last_accessed: Optional last accessed date filter
-            user_id: Optional user ID filter
-            distance_threshold: Optional distance threshold for search results
-            memory_type: Optional memory type filter
-            limit: Maximum number of results to return (default: 10)
-            offset: Offset for pagination (default: 0)
-
-        Returns:
-            MemoryRecordResults with matching memories from both memory types
-
-        Raises:
-            MemoryServerError: If the request fails
-
-        Example:
-            ```python
-            # Search for user preferences with topic filtering
-            from .filters import Topics
-
-            results = await client.search_memories(
-                text="user prefers dark mode",
-                topics=Topics(any=["preferences", "ui"]),
-                limit=5
-            )
-
-            for memory in results.memories:
-                print(f"Found: {memory.text}")
-            ```
-        """
-        # Convert dictionary filters to their proper filter objects if needed
-        if isinstance(session_id, dict):
-            session_id = SessionId(**session_id)
-        if isinstance(namespace, dict):
-            namespace = Namespace(**namespace)
-        if isinstance(topics, dict):
-            topics = Topics(**topics)
-        if isinstance(entities, dict):
-            entities = Entities(**entities)
-        if isinstance(created_at, dict):
-            created_at = CreatedAt(**created_at)
-        if isinstance(last_accessed, dict):
-            last_accessed = LastAccessed(**last_accessed)
-        if isinstance(user_id, dict):
-            user_id = UserId(**user_id)
-        if isinstance(memory_type, dict):
-            memory_type = MemoryType(**memory_type)
-
-        # Apply default namespace if needed and no namespace filter specified
-        if namespace is None and self.config.default_namespace is not None:
-            namespace = Namespace(eq=self.config.default_namespace)
-
-        payload = {
-            "text": text,
-            "limit": limit,
-            "offset": offset,
-        }
-
-        # Add filters if provided
-        if session_id:
-            payload["session_id"] = session_id.model_dump(exclude_none=True)
-        if namespace:
-            payload["namespace"] = namespace.model_dump(exclude_none=True)
-        if topics:
-            payload["topics"] = topics.model_dump(exclude_none=True)
-        if entities:
-            payload["entities"] = entities.model_dump(exclude_none=True)
-        if created_at:
-            payload["created_at"] = created_at.model_dump(
-                exclude_none=True, mode="json"
-            )
-        if last_accessed:
-            payload["last_accessed"] = last_accessed.model_dump(
-                exclude_none=True, mode="json"
-            )
-        if user_id:
-            payload["user_id"] = user_id.model_dump(exclude_none=True)
+            if isinstance(user_id, dict):
+                payload["user_id"] = user_id
+            else:
+                payload["user_id"] = user_id.model_dump(exclude_none=True)
         if memory_type:
             payload["memory_type"] = memory_type.model_dump(exclude_none=True)
         if distance_threshold is not None:
@@ -1076,7 +951,7 @@ async def add_memory_tool(
             # Create memory record
             memory = ClientMemoryRecord(
                 text=text,
-                memory_type=memory_type,
+                memory_type=MemoryTypeEnum(memory_type),
                 topics=topics,
                 entities=entities,
                 namespace=namespace or self.config.default_namespace,
@@ -1111,7 +986,7 @@ async def update_memory_data_tool(
         self,
         session_id: str,
         data: dict[str, Any],
-        merge_strategy: str = "merge",
+        merge_strategy: Literal["replace", "merge", "deep_merge"] = "merge",
         namespace: str | None = None,
         user_id: str | None = None,
     ) -> dict[str, Any]:
@@ -1997,70 +1872,6 @@ async def search_all_long_term_memories(
 
             offset += batch_size
 
-    async def search_all_memories(
-        self,
-        text: str,
-        session_id: SessionId | dict[str, Any] | None = None,
-        namespace: Namespace | dict[str, Any] | None = None,
-        topics: Topics | dict[str, Any] | None = None,
-        entities: Entities | dict[str, Any] | None = None,
-        created_at: CreatedAt | dict[str, Any] | None = None,
-        last_accessed: LastAccessed | dict[str, Any] | None = None,
-        user_id: UserId | dict[str, Any] | None = None,
-        distance_threshold: float | None = None,
-        memory_type: MemoryType | dict[str, Any] | None = None,
-        batch_size: int = 50,
-    ) -> AsyncIterator[MemoryRecord]:
-        """
-        Auto-paginating version of unified memory search.
-
-        Searches both working memory and long-term memory with automatic pagination.
-
-        Args:
-            text: Search query text
-            session_id: Optional session ID filter
-            namespace: Optional namespace filter
-            topics: Optional topics filter
-            entities: Optional entities filter
-            created_at: Optional creation date filter
-            last_accessed: Optional last accessed date filter
-            user_id: Optional user ID filter
-            distance_threshold: Optional distance threshold
-            memory_type: Optional memory type filter
-            batch_size: Number of results to fetch per API call
-
-        Yields:
-            Individual memory records from all result pages
-        """
-        offset = 0
-        while True:
-            results = await self.search_memories(
-                text=text,
-                session_id=session_id,
-                namespace=namespace,
-                topics=topics,
-                entities=entities,
-                created_at=created_at,
-                last_accessed=last_accessed,
-                user_id=user_id,
-                distance_threshold=distance_threshold,
-                memory_type=memory_type,
-                limit=batch_size,
-                offset=offset,
-            )
-
-            if not results.memories:
-                break
-
-            for memory in results.memories:
-                yield memory
-
-            # If we got fewer results than batch_size, we've reached the end
-            if len(results.memories) < batch_size:
-                break
-
-            offset += batch_size
-
     def validate_memory_record(self, memory: ClientMemoryRecord | MemoryRecord) -> None:
         """
         Validate memory record before sending to server.
@@ -2237,7 +2048,7 @@ async def append_messages_to_working_memory(
                 converted_existing_messages.append(msg)
             else:
                 # Fallback for any other message type - convert to string content
-                converted_existing_messages.append(
+                converted_existing_messages.append(  # type: ignore
                     {"role": "user", "content": str(msg)}
                 )
 
diff --git a/agent-memory-client/agent_memory_client/models.py b/agent-memory-client/agent_memory_client/models.py
index a21325d..965e997 100644
--- a/agent-memory-client/agent_memory_client/models.py
+++ b/agent-memory-client/agent_memory_client/models.py
@@ -122,7 +122,7 @@ class ClientMemoryRecord(MemoryRecord):
     """A memory record with a client-provided ID"""
 
     id: str = Field(
-        default_factory=lambda: str(ulid.new()),
+        default_factory=lambda: str(ulid.ULID()),
         description="Client-provided ID generated by the client (ULID)",
     )
 
diff --git a/agent-memory-client/tests/test_basic.py b/agent-memory-client/tests/test_basic.py
index b69cbd8..7f26ca3 100644
--- a/agent-memory-client/tests/test_basic.py
+++ b/agent-memory-client/tests/test_basic.py
@@ -83,7 +83,6 @@ def test_enhanced_methods():
 
     # Test pagination
     assert hasattr(client, "search_all_long_term_memories")
-    assert hasattr(client, "search_all_memories")
 
     # Test enhanced convenience methods
     assert hasattr(client, "update_working_memory_data")
diff --git a/agent-memory-client/tests/test_client.py b/agent-memory-client/tests/test_client.py
index ee41eb6..d9bc309 100644
--- a/agent-memory-client/tests/test_client.py
+++ b/agent-memory-client/tests/test_client.py
@@ -297,35 +297,6 @@ async def test_search_all_long_term_memories(self, enhanced_test_client):
             # Should have made 3 API calls
             assert mock_search.call_count == 3
 
-    @pytest.mark.asyncio
-    async def test_search_all_memories(self, enhanced_test_client):
-        """Test auto-paginating unified memory search."""
-        # Similar test for unified search
-        response = MemoryRecordResults(
-            total=25,
-            memories=[
-                MemoryRecordResult(
-                    id=f"memory-{i}",
-                    text=f"Memory text {i}",
-                    dist=0.1,
-                )
-                for i in range(25)
-            ],
-            next_offset=None,
-        )
-
-        with patch.object(enhanced_test_client, "search_memories") as mock_search:
-            mock_search.return_value = response
-
-            all_memories = []
-            async for memory in enhanced_test_client.search_all_memories(
-                text="test query", batch_size=50
-            ):
-                all_memories.append(memory)
-
-            assert len(all_memories) == 25
-            assert mock_search.call_count == 1
-
 
 class TestClientSideValidation:
     """Tests for client-side validation methods."""
diff --git a/agent_memory_server/api.py b/agent_memory_server/api.py
index 2f594ef..bdaa2db 100644
--- a/agent_memory_server/api.py
+++ b/agent_memory_server/api.py
@@ -339,7 +339,7 @@ async def put_working_memory(
 
             memories = [
                 MemoryRecord(
-                    id=str(ulid.new()),
+                    id=str(ulid.ULID()),
                     session_id=session_id,
                     text=f"{msg.role}: {msg.content}",
                     namespace=updated_memory.namespace,
diff --git a/agent_memory_server/extraction.py b/agent_memory_server/extraction.py
index e2e1808..6645e9a 100644
--- a/agent_memory_server/extraction.py
+++ b/agent_memory_server/extraction.py
@@ -333,7 +333,7 @@ async def extract_discrete_memories(
     if discrete_memories:
         long_term_memories = [
             MemoryRecord(
-                id_=str(ulid.new()),
+                id_=str(ulid.ULID()),
                 text=new_memory["text"],
                 memory_type=new_memory.get("type", "episodic"),
                 topics=new_memory.get("topics", []),
diff --git a/agent_memory_server/long_term_memory.py b/agent_memory_server/long_term_memory.py
index a872a15..f3f6990 100644
--- a/agent_memory_server/long_term_memory.py
+++ b/agent_memory_server/long_term_memory.py
@@ -244,7 +244,7 @@ async def merge_memories_with_llm(memories: list[dict], llm_client: Any = None)
     # Create the merged memory
     merged_memory = {
         "text": merged_text.strip(),
-        "id_": str(ulid.new()),
+        "id_": str(ulid.ULID()),
         "user_id": user_id,
         "session_id": session_id,
         "namespace": namespace,
@@ -664,7 +664,7 @@ async def index_long_term_memories(
     async with redis.pipeline(transaction=False) as pipe:
         for idx, vector in enumerate(embeddings):
             memory = processed_memories[idx]
-            id_ = memory.id if memory.id else str(ulid.new())
+            id_ = memory.id if memory.id else str(ulid.ULID())
             key = Keys.memory_key(id_, memory.namespace)
 
             # Generate memory hash for the memory
@@ -1426,7 +1426,7 @@ async def deduplicate_by_semantic_search(
 
             # Convert back to LongTermMemory
             merged_memory_obj = MemoryRecord(
-                id=memory.id or str(ulid.new()),
+                id=memory.id or str(ulid.ULID()),
                 text=merged_memory["text"],
                 user_id=merged_memory["user_id"],
                 session_id=merged_memory["session_id"],
@@ -1646,7 +1646,7 @@ async def extract_memories_from_messages(
 
                     # Create a new memory record from the extraction
                     extracted_memory = MemoryRecord(
-                        id=str(ulid.new()),  # Server-generated ID
+                        id=str(ulid.ULID()),  # Server-generated ID
                         text=memory_data["text"],
                         memory_type=memory_data.get("type", "semantic"),
                         topics=memory_data.get("topics", []),
diff --git a/agent_memory_server/mcp.py b/agent_memory_server/mcp.py
index 05bae12..7deeccb 100644
--- a/agent_memory_server/mcp.py
+++ b/agent_memory_server/mcp.py
@@ -690,7 +690,7 @@ async def set_working_memory(
             # Handle both MemoryRecord objects and dict inputs
             if isinstance(memory, MemoryRecord):
                 # Already a MemoryRecord object, ensure it has an ID
-                memory_id = memory.id or str(ulid.new())
+                memory_id = memory.id or str(ulid.ULID())
                 processed_memory = memory.model_copy(
                     update={
                         "id": memory_id,
@@ -701,7 +701,7 @@ async def set_working_memory(
                 # Dictionary input, convert to MemoryRecord
                 memory_dict = dict(memory)
                 if not memory_dict.get("id"):
-                    memory_dict["id"] = str(ulid.new())
+                    memory_dict["id"] = str(ulid.ULID())
                 memory_dict["persisted_at"] = None
                 processed_memory = MemoryRecord(**memory_dict)
 
diff --git a/agent_memory_server/migrations.py b/agent_memory_server/migrations.py
index 6e0b1f3..a1c1495 100644
--- a/agent_memory_server/migrations.py
+++ b/agent_memory_server/migrations.py
@@ -98,7 +98,7 @@ async def migrate_add_discrete_memory_extracted_2(redis: Redis | None = None) ->
         id_ = await redis.hget(name=key, key="id_")  # type: ignore
         if not id_:
             logger.info("Updating memory with no ID to set ID")
-            await redis.hset(name=key, key="id_", value=str(ulid.new()))  # type: ignore
+            await redis.hset(name=key, key="id_", value=str(ulid.ULID()))  # type: ignore
         # extracted: bytes | None = await redis.hget(
         #     name=key, key="discrete_memory_extracted"
         # )  # type: ignore
@@ -126,7 +126,7 @@ async def migrate_add_memory_type_3(redis: Redis | None = None) -> None:
         id_ = await redis.hget(name=key, key="id_")  # type: ignore
         if not id_:
             logger.info("Updating memory with no ID to set ID")
-            await redis.hset(name=key, key="id_", value=str(ulid.new()))  # type: ignore
+            await redis.hset(name=key, key="id_", value=str(ulid.ULID()))  # type: ignore
         memory_type: bytes | None = await redis.hget(name=key, key="memory_type")  # type: ignore
         if not memory_type:
             await redis.hset(name=key, key="memory_type", value="message")  # type: ignore
diff --git a/agent_memory_server/models.py b/agent_memory_server/models.py
index 5e16c08..10357e1 100644
--- a/agent_memory_server/models.py
+++ b/agent_memory_server/models.py
@@ -143,7 +143,7 @@ class ClientMemoryRecord(MemoryRecord):
     """A memory record with a client-provided ID"""
 
     id: str = Field(
-        default_factory=lambda: str(ulid.new()),
+        default_factory=lambda: str(ulid.ULID()),
         description="Client-provided ID for deduplication and overwrites",
     )
 
diff --git a/tests/conftest.py b/tests/conftest.py
index 76fc23b..258e5fc 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -144,7 +144,7 @@ async def session(use_test_redis_connection, async_redis_client):
         long_term_memories = []
         for msg in messages:
             memory = MemoryRecord(
-                id=str(ulid.new()),
+                id=str(ulid.ULID()),
                 text=f"{msg.role}: {msg.content}",
                 session_id=session_id,
                 namespace=namespace,
@@ -163,7 +163,7 @@ async def session(use_test_redis_connection, async_redis_client):
         async with use_test_redis_connection.pipeline(transaction=False) as pipe:
             for idx, vector in enumerate(embeddings):
                 memory = long_term_memories[idx]
-                id_ = memory.id if memory.id else str(ulid.new())
+                id_ = memory.id if memory.id else str(ulid.ULID())
                 key = Keys.memory_key(id_, memory.namespace)
 
                 # Generate memory hash for the memory
diff --git a/tests/test_client_enhancements.py b/tests/test_client_enhancements.py
index e5154e0..d7fa483 100644
--- a/tests/test_client_enhancements.py
+++ b/tests/test_client_enhancements.py
@@ -303,35 +303,6 @@ async def test_search_all_long_term_memories(self, enhanced_test_client):
             # Should have made 3 API calls
             assert mock_search.call_count == 3
 
-    @pytest.mark.asyncio
-    async def test_search_all_memories(self, enhanced_test_client):
-        """Test auto-paginating unified memory search."""
-        # Similar test for unified search
-        response = MemoryRecordResults(
-            total=25,
-            memories=[
-                MemoryRecordResult(
-                    id=f"memory-{i}",
-                    text=f"Memory text {i}",
-                    dist=0.1,
-                )
-                for i in range(25)
-            ],
-            next_offset=None,
-        )
-
-        with patch.object(enhanced_test_client, "search_memories") as mock_search:
-            mock_search.return_value = response
-
-            all_memories = []
-            async for memory in enhanced_test_client.search_all_memories(
-                text="test query", batch_size=50
-            ):
-                all_memories.append(memory)
-
-            assert len(all_memories) == 25
-            assert mock_search.call_count == 1
-
 
 class TestClientSideValidation:
     """Tests for client-side validation methods."""
diff --git a/tests/test_long_term_memory.py b/tests/test_long_term_memory.py
index 5081d39..7bf832d 100644
--- a/tests/test_long_term_memory.py
+++ b/tests/test_long_term_memory.py
@@ -108,7 +108,7 @@ def __init__(self, docs):
         mock_query.return_value = [
             Document(
                 id=b"doc1",
-                id_=str(ulid.new()),
+                id_=str(ulid.ULID()),
                 text=b"Hello, world!",
                 vector_distance=0.25,
                 created_at=mock_now,
@@ -121,7 +121,7 @@ def __init__(self, docs):
             ),
             Document(
                 id=b"doc2",
-                id_=str(ulid.new()),
+                id_=str(ulid.ULID()),
                 text=b"Hi there!",
                 vector_distance=0.75,
                 created_at=mock_now,

From 4ca3956bc4daecb794c447f1da39d63712f6a58c Mon Sep 17 00:00:00 2001
From: Andrew Brookins <a.m.brookins@gmail.com>
Date: Tue, 17 Jun 2025 16:23:03 -0700
Subject: [PATCH 10/12] Run trailing whitespace...

---
 .../agent_memory_client/py.typed              |   1 -
 client-tools.md                               | 162 ------------------
 examples/README.md                            |   4 +-
 examples/travel_agent.py                      |  12 +-
 4 files changed, 8 insertions(+), 171 deletions(-)
 delete mode 100644 client-tools.md

diff --git a/agent-memory-client/agent_memory_client/py.typed b/agent-memory-client/agent_memory_client/py.typed
index 0519ecb..e69de29 100644
--- a/agent-memory-client/agent_memory_client/py.typed
+++ b/agent-memory-client/agent_memory_client/py.typed
@@ -1 +0,0 @@
- 
\ No newline at end of file
diff --git a/client-tools.md b/client-tools.md
deleted file mode 100644
index 79f91c7..0000000
--- a/client-tools.md
+++ /dev/null
@@ -1,162 +0,0 @@
-# Task Memory
-
-**Created:** 2025-06-13 16:34:19
-**Branch:** feature/separate-client-codebase
-
-## Requirements
-
-Fix the errors generated with the command 'uv run mypy agent_memory_client'
-
-## Development Notes
-
-*Update this section as you work on the task. Include:*
-- *Progress updates*
-- *Key decisions made*
-- *Challenges encountered*
-- *Solutions implemented*
-- *Files modified*
-- *Testing notes*
-
-### Work Log
-
-- [2025-06-13 16:34:19] Task setup completed, TASK_MEMORY.md created
-
-#### [2025-06-13 17:00:00] Completed mypy error fixes and namespace refactoring
-
-**Issues Addressed:**
-1. **Fixed mypy type errors in agent_memory_client:** Added py.typed marker file to indicate type information availability
-2. **Continued namespace refactoring in travel_agent.py:** Enhanced user ID integration into namespaces
-3. **Resolved import and type annotation issues:** Fixed all type-related errors in the travel agent example
-
-**Key Changes Made:**
-
-1. **Added py.typed marker:** Created `agent-memory-client/agent_memory_client/py.typed` to resolve import stub issues
-
-2. **Enhanced TravelAgent class with proper namespace handling:**
-   - Added `_get_namespace(user_id)` helper method for consistent namespace generation
-   - Refactored client management to support multiple users with per-user clients
-   - Updated `get_client()` to maintain separate `MemoryAPIClient` instances per user
-   - Fixed `cleanup()` method to properly close all client connections
-
-3. **Fixed type annotations throughout travel_agent.py:**
-   - Corrected `MemoryType` usage to use `MemoryTypeEnum` directly
-   - Added proper imports for `Namespace` and `MemoryRecordResult` filter types
-   - Updated method signatures to use correct return types (`MemoryRecordResult` vs `MemoryRecord`)
-   - Fixed namespace parameter usage in search methods to use `Namespace(eq=namespace_string)`
-
-4. **Ensured consistent namespace usage:**
-   - All memory operations now explicitly use the `travel_agent:{user_id}` namespace pattern
-   - Working memory operations correctly set namespace in memory objects
-   - Long-term memory search and storage operations use proper namespace filters
-
-**Files Modified:**
-- `agent-memory-client/agent_memory_client/py.typed` (created)
-- `examples/travel_agent.py` (extensively refactored)
-
-**Testing:**
-- ✅ `uv run mypy agent-memory-client/agent_memory_client` - Success: no issues found
-- ✅ `uv run mypy examples/travel_agent.py` - Success: no issues found
-
-**Key Decisions:**
-- Chose to maintain separate client instances per user for better isolation and namespace management
-- Used explicit `Namespace` filter objects rather than relying on default namespace configuration
-- Maintained backward compatibility with existing method signatures while fixing type annotations
-
-#### [2025-06-13 17:20:00] Removed redundant features that memory server already handles
-
-**Issues Addressed:**
-1. **Removed manual summarization:** Eliminated conversation summarization logic since memory server handles this automatically
-2. **Removed manual memory extraction:** Eliminated LLM-based memory extraction since memory server provides automatic extraction
-3. **Removed duplicate checking:** Eliminated manual similar memory checking since memory server handles deduplication
-4. **Removed manual memory retrieval and augmentation:** Simplified to rely on memory server's built-in capabilities
-
-**Key Simplifications:**
-
-1. **Removed summarization infrastructure:**
-   - Deleted `MESSAGE_SUMMARIZATION_THRESHOLD` constant
-   - Removed `_summarize_conversation()` method
-   - Eliminated summarization logic from `_add_message_to_working_memory()`
-   - Removed `summarizer` LLM instance
-
-2. **Removed manual memory management:**
-   - Deleted `Memory` and `Memories` Pydantic models
-   - Removed `MemoryStrategy` enum and related strategy logic
-   - Eliminated `_extract_memories_from_conversation()` method
-   - Removed `_store_long_term_memory()` method
-   - Deleted `_similar_memory_exists()` duplicate checking
-   - Removed `_retrieve_relevant_memories()` and `_augment_query_with_memories()` methods
-
-3. **Simplified class interface:**
-   - Updated `TravelAgent.__init__()` to remove strategy parameter
-   - Simplified `_setup_llms()` to only include main conversation LLM
-   - Streamlined `process_user_input()` to focus on core conversation flow
-   - Updated `_generate_response()` to work with basic user input instead of augmented queries
-
-4. **Cleaned up dependencies:**
-   - Removed unused imports: `Enum`, `BaseModel`, filter classes, memory model classes
-   - Simplified import structure to only include essential components
-   - Removed command-line strategy argument from main function
-
-**Rationale:**
-- Modern memory servers typically provide automatic conversation summarization when needed
-- Memory extraction, deduplication, and semantic retrieval are core memory server features
-- Simplifying the travel agent to focus on conversation flow while delegating memory management to the server
-- Reduces code complexity and maintenance burden while leveraging server capabilities
-
-**Files Modified:**
-- `examples/travel_agent.py` (significantly simplified - removed ~200 lines of redundant code)
-
-**Testing:**
-- ✅ `uv run mypy examples/travel_agent.py` - Success: no issues found  
-- ✅ Travel agent imports and instantiates successfully after simplification
-
-**Key Decisions:**
-- Prioritized simplicity and delegation to memory server over manual memory management
-- Maintained core conversation functionality while removing redundant features
-- Kept namespace management and multi-user support as these are application-specific concerns
-
-#### [2025-06-13 17:30:00] Simplified client management to single client with explicit namespaces
-
-**Issues Addressed:**
-1. **Overcomplicated client management:** Multiple clients per user was unnecessarily complex and resource-intensive
-2. **Inefficient resource usage:** One client per user consumed more memory and connections than needed
-3. **Complex lifecycle management:** Managing multiple client lifecycles was error-prone
-
-**Key Simplifications:**
-
-1. **Replaced per-user clients with single client:**
-   - Changed from `self._memory_clients: dict[str, MemoryAPIClient]` to `self._memory_client: MemoryAPIClient | None`
-   - Simplified `get_client()` method to return single client instance without user parameter
-   - Removed user-specific client initialization and storage logic
-
-2. **Explicit namespace management:**
-   - Removed `default_namespace` from client configuration
-   - Always pass namespace explicitly using `self._get_namespace(user_id)` in all operations
-   - Maintained namespace isolation while using shared client
-
-3. **Simplified cleanup:**
-   - Changed from iterating over multiple clients to single client cleanup
-   - Reduced cleanup complexity and potential for resource leaks
-
-**Benefits:**
-- **Memory efficiency:** Single client instead of multiple per-user clients
-- **Connection pooling:** Better HTTP connection reuse across users
-- **Simpler lifecycle:** One client to initialize and cleanup
-- **Maintained isolation:** User namespaces still properly isolated via explicit namespace parameters
-- **Cleaner code:** Less complexity in client management logic
-
-**Files Modified:**
-- `examples/travel_agent.py` (simplified client management)
-
-**Testing:**
-- ✅ `uv run mypy examples/travel_agent.py` - Success: no issues found
-- ✅ Single-client travel agent imports and instantiates successfully
-
-**Key Decisions:**
-- Prioritized efficiency and simplicity over perceived per-user client isolation
-- Maintained namespace-based user isolation through explicit parameters
-- Leveraged HTTP client connection pooling for better resource utilization
-
----
-
-*This file serves as your working memory for this task. Keep it updated as you progress through the implementation.*
diff --git a/examples/README.md b/examples/README.md
index 8def895..f29baf9 100644
--- a/examples/README.md
+++ b/examples/README.md
@@ -17,7 +17,7 @@ A comprehensive travel assistant that demonstrates:
 The travel agent automatically discovers and uses all memory tools available from the client:
 
 1. **search_memory** - Search through previous conversations and stored information
-2. **get_working_memory** - Check current session state, stored memories, and data  
+2. **get_working_memory** - Check current session state, stored memories, and data
 3. **add_memory_to_working_memory** - Store important information as structured memories
 4. **update_working_memory_data** - Store/update session-specific data like trip plans
 
@@ -39,7 +39,7 @@ python travel_agent.py --memory-server-url http://localhost:8001
 
 ### Environment Variables
 - `OPENAI_API_KEY` - Required for OpenAI ChatGPT
-- `TAVILY_API_KEY` - Optional for web search functionality  
+- `TAVILY_API_KEY` - Optional for web search functionality
 - `MEMORY_SERVER_URL` - Memory server URL (default: http://localhost:8000)
 - `REDIS_URL` - Redis URL for caching (default: redis://localhost:6379)
 
diff --git a/examples/travel_agent.py b/examples/travel_agent.py
index 1b418b6..1aeaebc 100644
--- a/examples/travel_agent.py
+++ b/examples/travel_agent.py
@@ -60,18 +60,18 @@
                 You are a helpful travel assistant. You can help with travel-related questions.
                 You have access to conversation history and memory management tools to provide
                 personalized responses.
-                
+
                 Available tools:
-                
-                1. **web_search** (if available): Search for current travel information, weather, 
+
+                1. **web_search** (if available): Search for current travel information, weather,
                    events, or other up-to-date data when specifically needed.
-                   
+
                 2. **Memory Management Tools** (always available):
                    - **search_memory**: Look up previous conversations and stored information
                    - **get_working_memory**: Check current session context
                    - **add_memory_to_working_memory**: Store important preferences or information
                    - **update_working_memory_data**: Save session-specific data
-                
+
                 **Guidelines**:
                 - Answer the user's actual question first and directly
                 - When someone shares information (like "I like X"), simply acknowledge it naturally - don't immediately give advice or suggestions unless they ask
@@ -82,7 +82,7 @@
                 - Only offer suggestions, recommendations, or tips if the user explicitly asks for them
                 - Store preferences and important details, but don't be overly eager about it
                 - If someone shares a preference, respond like a friend would - acknowledge it, maybe ask a follow-up question, but don't launch into advice
-                
+
                 Be helpful, friendly, and responsive. Mirror their conversational style - if they're just chatting, chat back. If they ask for help, then help.
                 """),
 }

From 36f7954a6def863c4eb0c340712693d9f20f7322 Mon Sep 17 00:00:00 2001
From: Andrew Brookins <a.m.brookins@gmail.com>
Date: Tue, 17 Jun 2025 16:25:20 -0700
Subject: [PATCH 11/12] fix fix fix

---
 examples/README.md | 1 -
 1 file changed, 1 deletion(-)

diff --git a/examples/README.md b/examples/README.md
index f29baf9..c733523 100644
--- a/examples/README.md
+++ b/examples/README.md
@@ -48,4 +48,3 @@ python travel_agent.py --memory-server-url http://localhost:8001
 - **Provider Agnostic**: Tool resolution works with OpenAI, Anthropic, and other LLM providers
 - **Error Handling**: Robust error handling for tool calls and network issues
 - **Logging**: Comprehensive logging shows which tools are available and being used
-

From 01dedbbceca7484347768bdbccd9835bad443c06 Mon Sep 17 00:00:00 2001
From: Andrew Brookins <a.m.brookins@gmail.com>
Date: Tue, 17 Jun 2025 16:35:05 -0700
Subject: [PATCH 12/12] install the client

---
 .github/workflows/python-tests.yml | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/.github/workflows/python-tests.yml b/.github/workflows/python-tests.yml
index 69d13d7..65d12c0 100644
--- a/.github/workflows/python-tests.yml
+++ b/.github/workflows/python-tests.yml
@@ -60,6 +60,10 @@ jobs:
         pip install uv
         uv sync --all-extras
 
+    - name: Install agent-memory-client
+      run: |
+        uv pip install -e ./agent-memory-client
+
     - name: Run tests
       run: |
         uv run pytest --run-api-tests