Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 1 addition & 3 deletions .github/workflows/integration-vector-io-tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -169,9 +169,7 @@ jobs:
run: |
uv run --no-sync \
pytest -sv --stack-config="files=inline::localfs,inference=inline::sentence-transformers,vector_io=${{ matrix.vector-io-provider }}" \
tests/integration/vector_io \
--embedding-model inline::sentence-transformers/nomic-ai/nomic-embed-text-v1.5 \
--embedding-dimension 768
tests/integration/vector_io

- name: Check Storage and Memory Available After Tests
if: ${{ always() }}
Expand Down
28 changes: 15 additions & 13 deletions docs/docs/building_applications/rag.mdx
Original file line number Diff line number Diff line change
Expand Up @@ -88,18 +88,19 @@ Llama Stack provides OpenAI-compatible RAG capabilities through:
To enable automatic vector store creation without specifying embedding models, configure a default embedding model in your run.yaml like so:

```yaml
models:
- model_id: nomic-ai/nomic-embed-text-v1.5
provider_id: inline::sentence-transformers
metadata:
embedding_dimension: 768
default_configured: true
vector_stores:
default_provider_id: faiss
default_embedding_model:
provider_id: sentence-transformers
model_id: nomic-ai/nomic-embed-text-v1.5
```

With this configuration:
- `client.vector_stores.create()` works without requiring embedding model parameters
- The system automatically uses the default model and its embedding dimension for any newly created vector store
- Only one model can be marked as `default_configured: true`
- `client.vector_stores.create()` works without requiring embedding model or provider parameters
- The system automatically uses the default vector store provider (`faiss`) when multiple providers are available
- The system automatically uses the default embedding model (`sentence-transformers/nomic-ai/nomic-embed-text-v1.5`) for any newly created vector store
- The `default_provider_id` specifies which vector storage backend to use
- The `default_embedding_model` specifies both the inference provider and model for embeddings

## Vector Store Operations

Expand All @@ -108,14 +109,15 @@ With this configuration:
You can create vector stores with automatic or explicit embedding model selection:

```python
# Automatic - uses default configured embedding model
# Automatic - uses default configured embedding model and vector store provider
vs = client.vector_stores.create()

# Explicit - specify embedding model when you need a specific one
# Explicit - specify embedding model and/or provider when you need specific ones
vs = client.vector_stores.create(
extra_body={
"embedding_model": "nomic-ai/nomic-embed-text-v1.5",
"embedding_dimension": 768
"provider_id": "faiss", # Optional: specify vector store provider
"embedding_model": "sentence-transformers/nomic-ai/nomic-embed-text-v1.5",
"embedding_dimension": 768 # Optional: will be auto-detected if not provided
}
)
```
1 change: 1 addition & 0 deletions llama_stack/apis/datatypes.py
Original file line number Diff line number Diff line change
Expand Up @@ -121,6 +121,7 @@ class Api(Enum, metaclass=DynamicApiMeta):

models = "models"
shields = "shields"
vector_dbs = "vector_dbs" # only used for routing
datasets = "datasets"
scoring_functions = "scoring_functions"
benchmarks = "benchmarks"
Expand Down
34 changes: 33 additions & 1 deletion llama_stack/apis/vector_dbs/vector_dbs.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
# This source code is licensed under the terms described in the LICENSE file in
# the root directory of this source tree.

from typing import Literal
from typing import Literal, Protocol, runtime_checkable

from pydantic import BaseModel

Expand Down Expand Up @@ -59,3 +59,35 @@ class ListVectorDBsResponse(BaseModel):
"""

data: list[VectorDB]


@runtime_checkable
class VectorDBs(Protocol):
"""Internal protocol for vector_dbs routing - no public API endpoints."""

async def list_vector_dbs(self) -> ListVectorDBsResponse:
"""Internal method to list vector databases."""
...

async def get_vector_db(
self,
vector_db_id: str,
) -> VectorDB:
"""Internal method to get a vector database by ID."""
...

async def register_vector_db(
self,
vector_db_id: str,
embedding_model: str,
embedding_dimension: int | None = 384,
provider_id: str | None = None,
vector_db_name: str | None = None,
provider_vector_db_id: str | None = None,
) -> VectorDB:
"""Internal method to register a vector database."""
...

async def unregister_vector_db(self, vector_db_id: str) -> None:
"""Internal method to unregister a vector database."""
...
25 changes: 25 additions & 0 deletions llama_stack/core/datatypes.py
Original file line number Diff line number Diff line change
Expand Up @@ -354,6 +354,26 @@ class AuthenticationRequiredError(Exception):
pass


class QualifiedModel(BaseModel):
"""A qualified model identifier, consisting of a provider ID and a model ID."""

provider_id: str
model_id: str


class VectorStoresConfig(BaseModel):
"""Configuration for vector stores in the stack."""

default_provider_id: str | None = Field(
default=None,
description="ID of the vector_io provider to use as default when multiple providers are available and none is specified.",
)
default_embedding_model: QualifiedModel | None = Field(
default=None,
description="Default embedding model configuration for vector stores.",
)


class QuotaPeriod(StrEnum):
DAY = "day"

Expand Down Expand Up @@ -499,6 +519,11 @@ class StackRunConfig(BaseModel):
description="Path to directory containing external API implementations. The APIs code and dependencies must be installed on the system.",
)

vector_stores: VectorStoresConfig | None = Field(
default=None,
description="Configuration for vector stores, including default embedding model",
)

@field_validator("external_providers_dir")
@classmethod
def validate_external_providers_dir(cls, v):
Expand Down
4 changes: 4 additions & 0 deletions llama_stack/core/distribution.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,10 @@ def builtin_automatically_routed_apis() -> list[AutoRoutedApiInfo]:
routing_table_api=Api.tool_groups,
router_api=Api.tool_runtime,
),
AutoRoutedApiInfo(
routing_table_api=Api.vector_dbs,
router_api=Api.vector_io,
),
]


Expand Down
2 changes: 2 additions & 0 deletions llama_stack/core/resolver.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@
from llama_stack.apis.shields import Shields
from llama_stack.apis.telemetry import Telemetry
from llama_stack.apis.tools import ToolGroups, ToolRuntime
from llama_stack.apis.vector_dbs import VectorDBs
from llama_stack.apis.vector_io import VectorIO
from llama_stack.apis.version import LLAMA_STACK_API_V1ALPHA
from llama_stack.core.client import get_client_impl
Expand Down Expand Up @@ -81,6 +82,7 @@ def api_protocol_map(external_apis: dict[Api, ExternalApiSpec] | None = None) ->
Api.inspect: Inspect,
Api.batches: Batches,
Api.vector_io: VectorIO,
Api.vector_dbs: VectorDBs,
Api.models: Models,
Api.safety: Safety,
Api.shields: Shields,
Expand Down
5 changes: 5 additions & 0 deletions llama_stack/core/routers/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ async def get_routing_table_impl(
from ..routing_tables.scoring_functions import ScoringFunctionsRoutingTable
from ..routing_tables.shields import ShieldsRoutingTable
from ..routing_tables.toolgroups import ToolGroupsRoutingTable
from ..routing_tables.vector_dbs import VectorDBsRoutingTable

api_to_tables = {
"models": ModelsRoutingTable,
Expand All @@ -37,6 +38,7 @@ async def get_routing_table_impl(
"scoring_functions": ScoringFunctionsRoutingTable,
"benchmarks": BenchmarksRoutingTable,
"tool_groups": ToolGroupsRoutingTable,
"vector_dbs": VectorDBsRoutingTable,
}

if api.value not in api_to_tables:
Expand Down Expand Up @@ -91,6 +93,9 @@ async def get_auto_router_impl(
await inference_store.initialize()
api_to_dep_impl["store"] = inference_store

elif api == Api.vector_io:
api_to_dep_impl["vector_stores_config"] = run_config.vector_stores

impl = api_to_routers[api.value](routing_table, **api_to_dep_impl)
await impl.initialize()
return impl
40 changes: 33 additions & 7 deletions llama_stack/core/routers/vector_io.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@
VectorStoreObject,
VectorStoreSearchResponsePage,
)
from llama_stack.core.datatypes import VectorStoresConfig
from llama_stack.log import get_logger
from llama_stack.providers.datatypes import HealthResponse, HealthStatus, RoutingTable

Expand All @@ -43,9 +44,11 @@ class VectorIORouter(VectorIO):
def __init__(
self,
routing_table: RoutingTable,
vector_stores_config: VectorStoresConfig | None = None,
) -> None:
logger.debug("Initializing VectorIORouter")
self.routing_table = routing_table
self.vector_stores_config = vector_stores_config

async def initialize(self) -> None:
logger.debug("VectorIORouter.initialize")
Expand Down Expand Up @@ -122,6 +125,17 @@ async def openai_create_vector_store(
embedding_dimension = extra.get("embedding_dimension")
provider_id = extra.get("provider_id")

# Use default embedding model if not specified
if (
embedding_model is None
and self.vector_stores_config
and self.vector_stores_config.default_embedding_model is not None
):
# Construct the full model ID with provider prefix
embedding_provider_id = self.vector_stores_config.default_embedding_model.provider_id
model_id = self.vector_stores_config.default_embedding_model.model_id
embedding_model = f"{embedding_provider_id}/{model_id}"

if embedding_model is not None and embedding_dimension is None:
embedding_dimension = await self._get_embedding_model_dimension(embedding_model)

Expand All @@ -132,11 +146,24 @@ async def openai_create_vector_store(
raise ValueError("No vector_io providers available")
if num_providers > 1:
available_providers = list(self.routing_table.impls_by_provider_id.keys())
raise ValueError(
f"Multiple vector_io providers available. Please specify provider_id in extra_body. "
f"Available providers: {available_providers}"
)
provider_id = list(self.routing_table.impls_by_provider_id.keys())[0]
# Use default configured provider
if self.vector_stores_config and self.vector_stores_config.default_provider_id:
default_provider = self.vector_stores_config.default_provider_id
if default_provider in available_providers:
provider_id = default_provider
logger.debug(f"Using configured default vector store provider: {provider_id}")
else:
raise ValueError(
f"Configured default vector store provider '{default_provider}' not found. "
f"Available providers: {available_providers}"
)
else:
raise ValueError(
f"Multiple vector_io providers available. Please specify provider_id in extra_body. "
f"Available providers: {available_providers}"
)
else:
provider_id = list(self.routing_table.impls_by_provider_id.keys())[0]

vector_db_id = f"vs_{uuid.uuid4()}"
registered_vector_db = await self.routing_table.register_vector_db(
Expand Down Expand Up @@ -243,8 +270,7 @@ async def openai_delete_vector_store(
vector_store_id: str,
) -> VectorStoreDeleteResponse:
logger.debug(f"VectorIORouter.openai_delete_vector_store: {vector_store_id}")
provider = await self.routing_table.get_provider_impl(vector_store_id)
return await provider.openai_delete_vector_store(vector_store_id)
return await self.routing_table.openai_delete_vector_store(vector_store_id)

async def openai_search_vector_store(
self,
Expand Down
3 changes: 3 additions & 0 deletions llama_stack/core/routing_tables/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -134,12 +134,15 @@ async def get_provider_impl(self, routing_key: str, provider_id: str | None = No
from .scoring_functions import ScoringFunctionsRoutingTable
from .shields import ShieldsRoutingTable
from .toolgroups import ToolGroupsRoutingTable
from .vector_dbs import VectorDBsRoutingTable

def apiname_object():
if isinstance(self, ModelsRoutingTable):
return ("Inference", "model")
elif isinstance(self, ShieldsRoutingTable):
return ("Safety", "shield")
elif isinstance(self, VectorDBsRoutingTable):
return ("VectorIO", "vector_db")
elif isinstance(self, DatasetsRoutingTable):
return ("DatasetIO", "dataset")
elif isinstance(self, ScoringFunctionsRoutingTable):
Expand Down
Loading
Loading