Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion dspy/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@

from dspy.evaluate import Evaluate # isort: skip
from dspy.clients import * # isort: skip
from dspy.adapters import Adapter, ChatAdapter, JSONAdapter, XMLAdapter, TwoStepAdapter, Image, Audio, History, Type, Tool, ToolCalls, Code # isort: skip
from dspy.adapters import Adapter, ChatAdapter, JSONAdapter, XMLAdapter, TwoStepAdapter, Image, Audio, History, Type, Tool, ToolCalls, Code, Citations, Document # isort: skip
from dspy.utils.logging_utils import configure_dspy_loggers, disable_logging, enable_logging
from dspy.utils.asyncify import asyncify
from dspy.utils.syncify import syncify
Expand Down
4 changes: 3 additions & 1 deletion dspy/adapters/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
from dspy.adapters.chat_adapter import ChatAdapter
from dspy.adapters.json_adapter import JSONAdapter
from dspy.adapters.two_step_adapter import TwoStepAdapter
from dspy.adapters.types import Audio, Code, History, Image, Tool, ToolCalls, Type
from dspy.adapters.types import Audio, Citations, Code, Document, History, Image, Tool, ToolCalls, Type
from dspy.adapters.xml_adapter import XMLAdapter

__all__ = [
Expand All @@ -13,6 +13,8 @@
"Image",
"Audio",
"Code",
"Citations",
"Document",
"JSONAdapter",
"XMLAdapter",
"TwoStepAdapter",
Expand Down
20 changes: 19 additions & 1 deletion dspy/adapters/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
import json_repair
import litellm

from dspy.adapters.types import History
from dspy.adapters.types import Citations, History
from dspy.adapters.types.base_type import split_message_content_for_custom_types
from dspy.adapters.types.tool import Tool, ToolCalls
from dspy.signatures.signature import Signature
Expand Down Expand Up @@ -63,6 +63,10 @@ def _call_preprocess(

return signature_for_native_function_calling

citation_output_field_name = self._get_citation_output_field_name(signature)
if citation_output_field_name:
signature = signature.delete(citation_output_field_name)

return signature

def _call_postprocess(
Expand All @@ -74,16 +78,19 @@ def _call_postprocess(
values = []

tool_call_output_field_name = self._get_tool_call_output_field_name(original_signature)
citation_output_field_name = self._get_citation_output_field_name(original_signature)

for output in outputs:
output_logprobs = None
tool_calls = None
citations = None
text = output

if isinstance(output, dict):
text = output["text"]
output_logprobs = output.get("logprobs")
tool_calls = output.get("tool_calls")
citations = output.get("citations")

if text:
value = self.parse(processed_signature, text)
Expand All @@ -106,6 +113,10 @@ def _call_postprocess(
]
value[tool_call_output_field_name] = ToolCalls.from_dict_list(tool_calls)

if citations and citation_output_field_name:
citations_obj = Citations.from_dict_list(citations)
value[citation_output_field_name] = citations_obj

if output_logprobs:
value["logprobs"] = output_logprobs

Expand Down Expand Up @@ -390,6 +401,13 @@ def _get_tool_call_output_field_name(self, signature: type[Signature]) -> bool:
return name
return None

def _get_citation_output_field_name(self, signature: type[Signature]) -> str | None:
"""Find the Citations output field in the signature."""
for name, field in signature.output_fields.items():
if field.annotation == Citations:
return name
return None

def format_conversation_history(
self,
signature: type[Signature],
Expand Down
4 changes: 3 additions & 1 deletion dspy/adapters/types/__init__.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,10 @@
from dspy.adapters.types.audio import Audio
from dspy.adapters.types.base_type import Type
from dspy.adapters.types.citation import Citations
from dspy.adapters.types.code import Code
from dspy.adapters.types.document import Document
from dspy.adapters.types.history import History
from dspy.adapters.types.image import Image
from dspy.adapters.types.tool import Tool, ToolCalls

__all__ = ["History", "Image", "Audio", "Type", "Tool", "ToolCalls", "Code"]
__all__ = ["History", "Image", "Audio", "Type", "Tool", "ToolCalls", "Code", "Citations", "Document"]
165 changes: 165 additions & 0 deletions dspy/adapters/types/citation.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,165 @@
from typing import Any

import pydantic

from dspy.adapters.types.base_type import Type


class Citations(Type):
"""Citations extracted from an LM response with source references.
This type represents citations returned by language models that support
citation extraction, particularly Anthropic's Citations API through LiteLLM.
Citations include the quoted text and source information.
Example:
```python
import dspy
from dspy.signatures import Signature
class AnswerWithSources(Signature):
'''Answer questions using provided documents with citations.'''
documents: list[dspy.Document] = dspy.InputField()
question: str = dspy.InputField()
answer: str = dspy.OutputField()
citations: dspy.Citations = dspy.OutputField()
# Create documents to provide as sources
docs = [
dspy.Document(
data="The Earth orbits the Sun in an elliptical path.",
title="Basic Astronomy Facts"
),
dspy.Document(
data="Water boils at 100°C at standard atmospheric pressure.",
title="Physics Fundamentals",
metadata={"author": "Dr. Smith", "year": 2023}
)
]
# Use with a model that supports citations like Claude
lm = dspy.LM("anthropic/claude-opus-4-1-20250805")
predictor = dspy.Predict(AnswerWithSources, lm=lm)
result = predictor(documents=docs, question="What temperature does water boil?")
for citation in result.citations.citations:
print(citation.format())
```
"""

class Citation(Type):
"""Individual citation with character location information."""
type: str = "char_location"
cited_text: str
document_index: int
document_title: str | None = None
start_char_index: int
end_char_index: int
supported_text: str | None = None

def format(self) -> dict[str, Any]:
"""Format citation as dictionary for LM consumption.
Returns:
A dictionary in the format expected by citation APIs.
"""
citation_dict = {
"type": self.type,
"cited_text": self.cited_text,
"document_index": self.document_index,
"start_char_index": self.start_char_index,
"end_char_index": self.end_char_index
}

if self.document_title:
citation_dict["document_title"] = self.document_title

if self.supported_text:
citation_dict["supported_text"] = self.supported_text

return citation_dict

citations: list[Citation]

@classmethod
def from_dict_list(cls, citations_dicts: list[dict[str, Any]]) -> "Citations":
"""Convert a list of dictionaries to a Citations instance.
Args:
citations_dicts: A list of dictionaries, where each dictionary should have 'cited_text' key
and 'document_index', 'start_char_index', 'end_char_index' keys.
Returns:
A Citations instance.
Example:
```python
citations_dict = [
{
"cited_text": "The sky is blue",
"document_index": 0,
"document_title": "Weather Guide",
"start_char_index": 0,
"end_char_index": 15,
"supported_text": "The sky was blue yesterday."
}
]
citations = Citations.from_dict_list(citations_dict)
```
"""
citations = [cls.Citation(**item) for item in citations_dicts]
return cls(citations=citations)

@classmethod
def description(cls) -> str:
"""Description of the citations type for use in prompts."""
return (
"Citations with quoted text and source references. "
"Include the exact text being cited and information about its source."
)

def format(self) -> list[dict[str, Any]]:
"""Format citations as a list of dictionaries."""
return [citation.format() for citation in self.citations]

@pydantic.model_validator(mode="before")
@classmethod
def validate_input(cls, data: Any):
if isinstance(data, cls):
return data

# Handle case where data is a list of dicts with citation info
if isinstance(data, list) and all(
isinstance(item, dict) and "cited_text" in item for item in data
):
return {"citations": [cls.Citation(**item) for item in data]}

# Handle case where data is a dict
elif isinstance(data, dict):
if "citations" in data:
# Handle case where data is a dict with "citations" key
citations_data = data["citations"]
if isinstance(citations_data, list):
return {
"citations": [
cls.Citation(**item) if isinstance(item, dict) else item
for item in citations_data
]
}
elif "cited_text" in data:
# Handle case where data is a single citation dict
return {"citations": [cls.Citation(**data)]}

raise ValueError(f"Received invalid value for `dspy.Citations`: {data}")

def __iter__(self):
"""Allow iteration over citations."""
return iter(self.citations)

def __len__(self):
"""Return the number of citations."""
return len(self.citations)

def __getitem__(self, index):
"""Allow indexing into citations."""
return self.citations[index]
111 changes: 111 additions & 0 deletions dspy/adapters/types/document.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,111 @@
from typing import Any, Literal

import pydantic

from dspy.adapters.types.base_type import Type


class Document(Type):
"""A document type for providing content that can be cited by language models.

This type represents documents that can be passed to language models for citation-enabled
responses, particularly useful with Anthropic's Citations API. Documents include the content
and metadata that helps the LM understand and reference the source material.

Attributes:
data: The text content of the document
title: Optional title for the document (used in citations)
media_type: MIME type of the document content (defaults to "text/plain")
context: Optional context information about the document

Example:
```python
import dspy
from dspy.signatures import Signature

class AnswerWithSources(Signature):
'''Answer questions using provided documents with citations.'''
documents: list[dspy.Document] = dspy.InputField()
question: str = dspy.InputField()
answer: str = dspy.OutputField()
citations: dspy.Citations = dspy.OutputField()

# Create documents
docs = [
dspy.Document(
data="The Earth orbits the Sun in an elliptical path.",
title="Basic Astronomy Facts"
),
dspy.Document(
data="Water boils at 100°C at standard atmospheric pressure.",
title="Physics Fundamentals",
)
]

# Use with a citation-supporting model
lm = dspy.LM("anthropic/claude-opus-4-1-20250805")
predictor = dspy.Predict(AnswerWithSources)
result = predictor(documents=docs, question="What temperature does water boil?", lm=lm)
print(result.citations)
```
"""

data: str
title: str | None = None
media_type: Literal["text/plain", "application/pdf"] = "text/plain"
context: str | None = None

def format(self) -> list[dict[str, Any]]:
"""Format document for LM consumption.

Returns:
A list containing the document block in the format expected by citation-enabled language models.
"""
document_block = {
"type": "document",
"source": {
"type": "text",
"media_type": self.media_type,
"data": self.data
},
"citations": {"enabled": True}
}

if self.title:
document_block["title"] = self.title

if self.context:
document_block["context"] = self.context

return [document_block]



@classmethod
def description(cls) -> str:
"""Description of the document type for use in prompts."""
return (
"A document containing text content that can be referenced and cited. "
"Include the full text content and optionally a title for proper referencing."
)

@pydantic.model_validator(mode="before")
@classmethod
def validate_input(cls, data: Any):
if isinstance(data, cls):
return data

# Handle case where data is just a string (data only)
if isinstance(data, str):
return {"data": data}

# Handle case where data is a dict
elif isinstance(data, dict):
return data

raise ValueError(f"Received invalid value for `dspy.Document`: {data}")

def __str__(self) -> str:
"""String representation showing title and content length."""
title_part = f"'{self.title}': " if self.title else ""
return f"Document({title_part}{len(self.data)} chars)"
Loading