Skip to content

Add dual context percentage fields to working memory endpoints #38

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 6 commits into from
Jul 26, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 8 additions & 1 deletion agent-memory-client/agent_memory_client/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -215,7 +215,14 @@ class SessionListResponse(BaseModel):
class WorkingMemoryResponse(WorkingMemory):
"""Response from working memory operations"""

pass
context_percentage_total_used: float | None = Field(
default=None,
description="Percentage of total context window currently used (0-100)",
)
context_percentage_until_summarization: float | None = Field(
default=None,
description="Percentage until auto-summarization triggers (0-100, reaches 100% at summarization threshold)",
)


class MemoryRecordResult(MemoryRecord):
Expand Down
127 changes: 127 additions & 0 deletions agent-memory-client/tests/test_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -653,3 +653,130 @@ def test_validation_with_none_values(self, enhanced_test_client):

# Should not raise
enhanced_test_client.validate_memory_record(memory)


class TestContextUsagePercentage:
"""Tests for context usage percentage functionality."""

@pytest.mark.asyncio
async def test_working_memory_response_with_context_percentages(
self, enhanced_test_client
):
"""Test that WorkingMemoryResponse properly handles both context percentage fields."""
session_id = "test-session"

# Test with both context percentages set
working_memory_response = WorkingMemoryResponse(
session_id=session_id,
messages=[],
memories=[],
data={},
context=None,
user_id=None,
context_percentage_total_used=45.5,
context_percentage_until_summarization=65.0,
)

assert working_memory_response.context_percentage_total_used == 45.5
assert working_memory_response.context_percentage_until_summarization == 65.0
assert working_memory_response.session_id == session_id

# Test with None context percentages (default)
working_memory_response_none = WorkingMemoryResponse(
session_id=session_id,
messages=[],
memories=[],
data={},
context=None,
user_id=None,
)

assert working_memory_response_none.context_percentage_total_used is None
assert (
working_memory_response_none.context_percentage_until_summarization is None
)

@pytest.mark.asyncio
async def test_context_percentages_serialization(self, enhanced_test_client):
"""Test that both context percentage fields are properly serialized."""
session_id = "test-session"

# Create response with both context percentages
working_memory_response = WorkingMemoryResponse(
session_id=session_id,
messages=[],
memories=[],
data={},
context=None,
user_id=None,
context_percentage_total_used=75.0,
context_percentage_until_summarization=85.5,
)

# Test model_dump includes both fields
dumped = working_memory_response.model_dump()
assert "context_percentage_total_used" in dumped
assert "context_percentage_until_summarization" in dumped
assert dumped["context_percentage_total_used"] == 75.0
assert dumped["context_percentage_until_summarization"] == 85.5

# Test JSON serialization
json_data = working_memory_response.model_dump_json()
assert "context_percentage_total_used" in json_data
assert "context_percentage_until_summarization" in json_data
assert "75.0" in json_data
assert "85.5" in json_data

@pytest.mark.asyncio
async def test_context_percentages_validation(self, enhanced_test_client):
"""Test that both context percentage fields accept valid values."""
session_id = "test-session"

# Test valid percentages
valid_percentages = [0.0, 25.5, 50.0, 99.9, 100.0, None]

for percentage in valid_percentages:
working_memory_response = WorkingMemoryResponse(
session_id=session_id,
messages=[],
memories=[],
data={},
context=None,
user_id=None,
context_percentage_total_used=percentage,
context_percentage_until_summarization=percentage,
)
assert working_memory_response.context_percentage_total_used == percentage
assert (
working_memory_response.context_percentage_until_summarization
== percentage
)

def test_working_memory_response_from_dict_with_context_percentages(self):
"""Test that WorkingMemoryResponse can be created from dict with both context percentage fields."""
session_id = "test-session"

# Test creating WorkingMemoryResponse from dict (simulating API response parsing)
response_dict = {
"session_id": session_id,
"messages": [],
"memories": [],
"data": {},
"context": None,
"user_id": None,
"context_percentage_total_used": 33.3,
"context_percentage_until_summarization": 47.5,
"tokens": 0,
"namespace": None,
"ttl_seconds": None,
"last_accessed": "2024-01-01T00:00:00Z",
}

# This simulates what happens when the API client parses the JSON response
result = WorkingMemoryResponse(**response_dict)

# Verify both context percentage fields are included
assert isinstance(result, WorkingMemoryResponse)
assert result.context_percentage_total_used == 33.3
assert result.context_percentage_until_summarization == 47.5
assert result.session_id == session_id
77 changes: 73 additions & 4 deletions agent_memory_server/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,45 @@ def _calculate_messages_token_count(messages: list[MemoryMessage]) -> int:
return total_tokens


def _calculate_context_usage_percentages(
messages: list[MemoryMessage],
model_name: ModelNameLiteral | None,
context_window_max: int | None,
) -> tuple[float | None, float | None]:
"""
Calculate context usage percentages for total usage and until summarization triggers.

Args:
messages: List of messages to calculate token count for
model_name: The client's LLM model name for context window determination
context_window_max: Direct specification of context window max tokens

Returns:
Tuple of (total_percentage, until_summarization_percentage)
- total_percentage: Percentage (0-100) of total context window used
- until_summarization_percentage: Percentage (0-100) until summarization triggers
Both values are None if no model info provided
"""
if not messages or (not model_name and not context_window_max):
return None, None

# Calculate current token usage
current_tokens = _calculate_messages_token_count(messages)

# Get effective token limit for the client's model
max_tokens = _get_effective_token_limit(model_name, context_window_max)

# Calculate percentage of total context window used
total_percentage = (current_tokens / max_tokens) * 100.0

# Calculate percentage until summarization threshold
token_threshold = int(max_tokens * settings.summarization_threshold)
until_summarization_percentage = (current_tokens / token_threshold) * 100.0

# Cap both at 100% for display purposes
return min(total_percentage, 100.0), min(until_summarization_percentage, 100.0)


async def _summarize_working_memory(
memory: WorkingMemory,
model_name: ModelNameLiteral | None = None,
Expand All @@ -88,8 +127,8 @@ async def _summarize_working_memory(
max_tokens = _get_effective_token_limit(model_name, context_window_max)

# Reserve space for new messages, function calls, and response generation
# Use 70% of context window to leave room for new content
token_threshold = int(max_tokens * 0.7)
# Use configurable threshold to leave room for new content
token_threshold = int(max_tokens * settings.summarization_threshold)

if current_tokens <= token_threshold:
return memory
Expand Down Expand Up @@ -269,7 +308,22 @@ async def get_working_memory(

logger.debug(f"Working mem: {working_mem}")

return working_mem
# Calculate context usage percentages
total_percentage, until_summarization_percentage = (
_calculate_context_usage_percentages(
messages=working_mem.messages,
model_name=model_name,
context_window_max=context_window_max,
)
)

# Return WorkingMemoryResponse with both percentage values
working_mem_data = working_mem.model_dump()
working_mem_data["context_percentage_total_used"] = total_percentage
working_mem_data["context_percentage_until_summarization"] = (
until_summarization_percentage
)
return WorkingMemoryResponse(**working_mem_data)


@router.put("/v1/working-memory/{session_id}", response_model=WorkingMemoryResponse)
Expand Down Expand Up @@ -348,7 +402,22 @@ async def put_working_memory(
namespace=updated_memory.namespace,
)

return updated_memory
# Calculate context usage percentages based on the final state (after potential summarization)
total_percentage, until_summarization_percentage = (
_calculate_context_usage_percentages(
messages=updated_memory.messages,
model_name=model_name,
context_window_max=context_window_max,
)
)

# Return WorkingMemoryResponse with both percentage values
updated_memory_data = updated_memory.model_dump()
updated_memory_data["context_percentage_total_used"] = total_percentage
updated_memory_data["context_percentage_until_summarization"] = (
until_summarization_percentage
)
return WorkingMemoryResponse(**updated_memory_data)


@router.delete("/v1/working-memory/{session_id}", response_model=AckResponse)
Expand Down
3 changes: 3 additions & 0 deletions agent_memory_server/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -119,6 +119,9 @@ class Settings(BaseSettings):

# Working memory settings
window_size: int = 20 # Default number of recent messages to return
summarization_threshold: float = (
0.7 # Fraction of context window that triggers summarization
)

# Other Application settings
log_level: Literal["DEBUG", "INFO", "WARNING", "ERROR", "CRITICAL"] = "INFO"
Expand Down
9 changes: 9 additions & 0 deletions agent_memory_server/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -222,6 +222,15 @@ class WorkingMemory(BaseModel):
class WorkingMemoryResponse(WorkingMemory):
"""Response containing working memory"""

context_percentage_total_used: float | None = Field(
default=None,
description="Percentage of total context window currently used (0-100)",
)
context_percentage_until_summarization: float | None = Field(
default=None,
description="Percentage until auto-summarization triggers (0-100, reaches 100% at summarization threshold)",
)


class WorkingMemoryRequest(BaseModel):
"""Request parameters for working memory operations"""
Expand Down
5 changes: 1 addition & 4 deletions docs/memory-types.md
Original file line number Diff line number Diff line change
Expand Up @@ -202,11 +202,8 @@ Long-term memory supports three types of memories:
# Create long-term memories
POST /v1/long-term-memory/

# Search long-term memories only
# Search long-term memories
POST /v1/long-term-memory/search

# Search across all memory types
POST /v1/memory/search
```

### Search Capabilities
Expand Down
Binary file added dump.rdb
Binary file not shown.