LangChain

simplevecdb.integrations.langchain.SimpleVecDBVectorStore

Bases: VectorStore

LangChain-compatible wrapper for SimpleVecDB.

Source code in src/simplevecdb/integrations/langchain.py
class SimpleVecDBVectorStore(VectorStore):
    """LangChain-compatible wrapper for SimpleVecDB."""

    def __init__(
        self,
        db_path: str = ":memory:",
        embedding: Embeddings | None = None,
        collection_name: str = "default",
        **kwargs: Any,
    ):
        self.embedding = embedding  # LangChain expects this
        self._db = VectorDB(path=db_path, **kwargs)
        self._collection = self._db.collection(collection_name)

    @classmethod
    def from_texts(
        cls,
        texts: list[str],
        embedding: Embeddings,
        metadatas: list[dict] | None = None,
        db_path: str = ":memory:",
        collection_name: str = "default",
        **kwargs: Any,
    ) -> "SimpleVecDBVectorStore":
        """
        Initialize from texts (embeds them automatically).

        Args:
            texts: List of texts to add.
            embedding: LangChain Embeddings model.
            metadatas: Optional list of metadata dicts.
            db_path: Path to SQLite database.
            collection_name: Name of the collection to use.
            **kwargs: Additional arguments for VectorDB.

        Returns:
            Initialized SimpleVecDBVectorStore.
        """
        store = cls(
            embedding=embedding,
            db_path=db_path,
            collection_name=collection_name,
            **kwargs,
        )
        store.add_texts(texts, metadatas)
        return store

    def add_texts(
        self,
        texts: Iterable[str],
        metadatas: list[dict] | None = None,
        **kwargs: Any,
    ) -> list[str]:
        """
        Add texts (embed if no pre-computed). Returns IDs as str.

        Args:
            texts: Iterable of texts to add.
            metadatas: Optional list of metadata dicts.
            **kwargs: Additional arguments (e.g., ids).

        Returns:
            List of document IDs.
        """
        texts_list = list(texts)
        embeddings = None
        if self.embedding:
            embeddings = self.embedding.embed_documents(texts_list)
        ids = self._collection.add_texts(
            texts=texts_list,
            metadatas=metadatas,
            embeddings=embeddings,
            ids=kwargs.get("ids"),
        )
        return [str(id_) for id_ in ids]

    def similarity_search(
        self,
        query: str,
        k: int = 4,
        **kwargs: Any,
    ) -> list[LangChainDocument]:
        """
        Search by text query (auto-embeds).

        Args:
            query: Text query string.
            k: Number of results to return.
            **kwargs: Additional arguments (e.g., filter).

        Returns:
            List of LangChain Documents.
        """
        if self.embedding:
            query_vec = self.embedding.embed_query(query)
        else:
            raise ValueError("Embedding model required for text queries")
        results = self._collection.similarity_search(
            query=query_vec,
            k=k,
            filter=kwargs.get("filter"),
        )
        return [
            LangChainDocument(page_content=doc.page_content, metadata=doc.metadata)
            for doc, _ in results
        ]

    def similarity_search_with_score(
        self,
        query: str,
        k: int = 4,
        **kwargs: Any,
    ) -> list[tuple[LangChainDocument, float]]:
        """
        Return with scores (distances).

        Args:
            query: Text query string.
            k: Number of results to return.
            **kwargs: Additional arguments (e.g., filter).

        Returns:
            List of (Document, score) tuples.
        """
        if self.embedding:
            query_vec = self.embedding.embed_query(query)
        else:
            raise ValueError("Embedding model required")
        results = self._collection.similarity_search(
            query=query_vec,
            k=k,
            filter=kwargs.get("filter"),
        )
        return [
            (
                LangChainDocument(page_content=doc.page_content, metadata=doc.metadata),
                score,
            )
            for doc, score in results
        ]

    def delete(self, ids: list[str] | None = None, **kwargs: Any) -> None:
        """
        Delete documents by ID.

        Args:
            ids: List of document IDs to delete.
            **kwargs: Unused.
        """
        if ids:
            int_ids = [int(id_) for id_ in ids]
            self._collection.delete_by_ids(int_ids)

    def max_marginal_relevance_search(
        self,
        query: str,
        k: int = 4,
        fetch_k: int = constants.DEFAULT_FETCH_K,
        lambda_mult: float = 0.5,
        **kwargs: Any,
    ) -> list[LangChainDocument]:
        """
        Max marginal relevance search.

        Args:
            query: Text query string.
            k: Number of results to return.
            fetch_k: Number of candidates to fetch.
            lambda_mult: Diversity trade-off (unused in core currently).
            **kwargs: Additional arguments (e.g., filter).

        Returns:
            List of LangChain Documents.
        """
        if self.embedding:
            query_vec = self.embedding.embed_query(query)
        else:
            raise ValueError("Embedding model required for text queries")
        results = self._collection.max_marginal_relevance_search(
            query=query_vec,
            k=k,
            fetch_k=fetch_k,
            filter=kwargs.get("filter"),
        )
        return [
            LangChainDocument(page_content=doc.page_content, metadata=doc.metadata)
            for doc in results
        ]

    def keyword_search(
        self,
        query: str,
        k: int = 4,
        **kwargs: Any,
    ) -> list[LangChainDocument]:
        """Return BM25-ranked documents without requiring embeddings."""

        results = self._collection.keyword_search(
            query, k=k, filter=kwargs.get("filter")
        )
        return [
            LangChainDocument(page_content=doc.page_content, metadata=doc.metadata)
            for doc, _ in results
        ]

    def hybrid_search(
        self,
        query: str,
        k: int = 4,
        **kwargs: Any,
    ) -> list[LangChainDocument]:
        """Blend BM25 + vector rankings using Reciprocal Rank Fusion."""

        query_vec = None
        if self.embedding and hasattr(self.embedding, "embed_query"):
            query_vec = self.embedding.embed_query(query)

        results = self._collection.hybrid_search(
            query,
            k=k,
            filter=kwargs.get("filter"),
            query_vector=query_vec,
            vector_k=kwargs.get("vector_k"),
            keyword_k=kwargs.get("keyword_k"),
            rrf_k=kwargs.get("rrf_k", constants.DEFAULT_RRF_K),
        )
        return [
            LangChainDocument(page_content=doc.page_content, metadata=doc.metadata)
            for doc, _ in results
        ]

    # Stub async (wrap sync for now – add true async in v1)
    async def aadd_texts(self, *args, **kwargs):
        return self.add_texts(*args, **kwargs)

    async def asimilarity_search(self, *args, **kwargs):
        return self.similarity_search(*args, **kwargs)

    # Other optional: max_marginal_relevance_search (implement via post-processing if needed)
    async def amax_marginal_relevance_search(
        self,
        *args,
        **kwargs,
    ) -> list[LangChainDocument]:
        return self.max_marginal_relevance_search(*args, **kwargs)

from_texts(texts, embedding, metadatas=None, db_path=':memory:', collection_name='default', **kwargs) classmethod

Initialize from texts (embeds them automatically).

Parameters:

Name Type Description Default
texts list[str]

List of texts to add.

required
embedding Embeddings

LangChain Embeddings model.

required
metadatas list[dict] | None

Optional list of metadata dicts.

None
db_path str

Path to SQLite database.

':memory:'
collection_name str

Name of the collection to use.

'default'
**kwargs Any

Additional arguments for VectorDB.

{}

Returns:

Type Description
SimpleVecDBVectorStore

Initialized SimpleVecDBVectorStore.

Source code in src/simplevecdb/integrations/langchain.py
@classmethod
def from_texts(
    cls,
    texts: list[str],
    embedding: Embeddings,
    metadatas: list[dict] | None = None,
    db_path: str = ":memory:",
    collection_name: str = "default",
    **kwargs: Any,
) -> "SimpleVecDBVectorStore":
    """
    Initialize from texts (embeds them automatically).

    Args:
        texts: List of texts to add.
        embedding: LangChain Embeddings model.
        metadatas: Optional list of metadata dicts.
        db_path: Path to SQLite database.
        collection_name: Name of the collection to use.
        **kwargs: Additional arguments for VectorDB.

    Returns:
        Initialized SimpleVecDBVectorStore.
    """
    store = cls(
        embedding=embedding,
        db_path=db_path,
        collection_name=collection_name,
        **kwargs,
    )
    store.add_texts(texts, metadatas)
    return store

add_texts(texts, metadatas=None, **kwargs)

Add texts (embed if no pre-computed). Returns IDs as str.

Parameters:

Name Type Description Default
texts Iterable[str]

Iterable of texts to add.

required
metadatas list[dict] | None

Optional list of metadata dicts.

None
**kwargs Any

Additional arguments (e.g., ids).

{}

Returns:

Type Description
list[str]

List of document IDs.

Source code in src/simplevecdb/integrations/langchain.py
def add_texts(
    self,
    texts: Iterable[str],
    metadatas: list[dict] | None = None,
    **kwargs: Any,
) -> list[str]:
    """
    Add texts (embed if no pre-computed). Returns IDs as str.

    Args:
        texts: Iterable of texts to add.
        metadatas: Optional list of metadata dicts.
        **kwargs: Additional arguments (e.g., ids).

    Returns:
        List of document IDs.
    """
    texts_list = list(texts)
    embeddings = None
    if self.embedding:
        embeddings = self.embedding.embed_documents(texts_list)
    ids = self._collection.add_texts(
        texts=texts_list,
        metadatas=metadatas,
        embeddings=embeddings,
        ids=kwargs.get("ids"),
    )
    return [str(id_) for id_ in ids]

Search by text query (auto-embeds).

Parameters:

Name Type Description Default
query str

Text query string.

required
k int

Number of results to return.

4
**kwargs Any

Additional arguments (e.g., filter).

{}

Returns:

Type Description
list[Document]

List of LangChain Documents.

Source code in src/simplevecdb/integrations/langchain.py
def similarity_search(
    self,
    query: str,
    k: int = 4,
    **kwargs: Any,
) -> list[LangChainDocument]:
    """
    Search by text query (auto-embeds).

    Args:
        query: Text query string.
        k: Number of results to return.
        **kwargs: Additional arguments (e.g., filter).

    Returns:
        List of LangChain Documents.
    """
    if self.embedding:
        query_vec = self.embedding.embed_query(query)
    else:
        raise ValueError("Embedding model required for text queries")
    results = self._collection.similarity_search(
        query=query_vec,
        k=k,
        filter=kwargs.get("filter"),
    )
    return [
        LangChainDocument(page_content=doc.page_content, metadata=doc.metadata)
        for doc, _ in results
    ]

similarity_search_with_score(query, k=4, **kwargs)

Return with scores (distances).

Parameters:

Name Type Description Default
query str

Text query string.

required
k int

Number of results to return.

4
**kwargs Any

Additional arguments (e.g., filter).

{}

Returns:

Type Description
list[tuple[Document, float]]

List of (Document, score) tuples.

Source code in src/simplevecdb/integrations/langchain.py
def similarity_search_with_score(
    self,
    query: str,
    k: int = 4,
    **kwargs: Any,
) -> list[tuple[LangChainDocument, float]]:
    """
    Return with scores (distances).

    Args:
        query: Text query string.
        k: Number of results to return.
        **kwargs: Additional arguments (e.g., filter).

    Returns:
        List of (Document, score) tuples.
    """
    if self.embedding:
        query_vec = self.embedding.embed_query(query)
    else:
        raise ValueError("Embedding model required")
    results = self._collection.similarity_search(
        query=query_vec,
        k=k,
        filter=kwargs.get("filter"),
    )
    return [
        (
            LangChainDocument(page_content=doc.page_content, metadata=doc.metadata),
            score,
        )
        for doc, score in results
    ]

delete(ids=None, **kwargs)

Delete documents by ID.

Parameters:

Name Type Description Default
ids list[str] | None

List of document IDs to delete.

None
**kwargs Any

Unused.

{}
Source code in src/simplevecdb/integrations/langchain.py
def delete(self, ids: list[str] | None = None, **kwargs: Any) -> None:
    """
    Delete documents by ID.

    Args:
        ids: List of document IDs to delete.
        **kwargs: Unused.
    """
    if ids:
        int_ids = [int(id_) for id_ in ids]
        self._collection.delete_by_ids(int_ids)

Max marginal relevance search.

Parameters:

Name Type Description Default
query str

Text query string.

required
k int

Number of results to return.

4
fetch_k int

Number of candidates to fetch.

DEFAULT_FETCH_K
lambda_mult float

Diversity trade-off (unused in core currently).

0.5
**kwargs Any

Additional arguments (e.g., filter).

{}

Returns:

Type Description
list[Document]

List of LangChain Documents.

Source code in src/simplevecdb/integrations/langchain.py
def max_marginal_relevance_search(
    self,
    query: str,
    k: int = 4,
    fetch_k: int = constants.DEFAULT_FETCH_K,
    lambda_mult: float = 0.5,
    **kwargs: Any,
) -> list[LangChainDocument]:
    """
    Max marginal relevance search.

    Args:
        query: Text query string.
        k: Number of results to return.
        fetch_k: Number of candidates to fetch.
        lambda_mult: Diversity trade-off (unused in core currently).
        **kwargs: Additional arguments (e.g., filter).

    Returns:
        List of LangChain Documents.
    """
    if self.embedding:
        query_vec = self.embedding.embed_query(query)
    else:
        raise ValueError("Embedding model required for text queries")
    results = self._collection.max_marginal_relevance_search(
        query=query_vec,
        k=k,
        fetch_k=fetch_k,
        filter=kwargs.get("filter"),
    )
    return [
        LangChainDocument(page_content=doc.page_content, metadata=doc.metadata)
        for doc in results
    ]

Return BM25-ranked documents without requiring embeddings.

Source code in src/simplevecdb/integrations/langchain.py
def keyword_search(
    self,
    query: str,
    k: int = 4,
    **kwargs: Any,
) -> list[LangChainDocument]:
    """Return BM25-ranked documents without requiring embeddings."""

    results = self._collection.keyword_search(
        query, k=k, filter=kwargs.get("filter")
    )
    return [
        LangChainDocument(page_content=doc.page_content, metadata=doc.metadata)
        for doc, _ in results
    ]

Blend BM25 + vector rankings using Reciprocal Rank Fusion.

Source code in src/simplevecdb/integrations/langchain.py
def hybrid_search(
    self,
    query: str,
    k: int = 4,
    **kwargs: Any,
) -> list[LangChainDocument]:
    """Blend BM25 + vector rankings using Reciprocal Rank Fusion."""

    query_vec = None
    if self.embedding and hasattr(self.embedding, "embed_query"):
        query_vec = self.embedding.embed_query(query)

    results = self._collection.hybrid_search(
        query,
        k=k,
        filter=kwargs.get("filter"),
        query_vector=query_vec,
        vector_k=kwargs.get("vector_k"),
        keyword_k=kwargs.get("keyword_k"),
        rrf_k=kwargs.get("rrf_k", constants.DEFAULT_RRF_K),
    )
    return [
        LangChainDocument(page_content=doc.page_content, metadata=doc.metadata)
        for doc, _ in results
    ]

LlamaIndex

simplevecdb.integrations.llamaindex.SimpleVecDBLlamaStore

Bases: BasePydanticVectorStore

LlamaIndex-compatible wrapper for SimpleVecDB.

Source code in src/simplevecdb/integrations/llamaindex.py
class SimpleVecDBLlamaStore(BasePydanticVectorStore):
    """LlamaIndex-compatible wrapper for SimpleVecDB."""

    stores_text: bool = True
    is_embedding_query: bool = True

    def __init__(
        self,
        db_path: str = ":memory:",
        collection_name: str = "default",
        **kwargs: Any,
    ):
        # Pass stores_text as a literal value, not self.stores_text
        super().__init__(stores_text=True)
        self._db = VectorDB(path=db_path, **kwargs)
        self._collection = self._db.collection(collection_name)
        # Map internal DB IDs to node IDs
        self._id_map: dict[int, str] = {}

    @property
    def client(self) -> Any:
        """Return the underlying client (our VectorDB)."""
        return self._db

    @property
    def store_text(self) -> bool:
        """Whether the store keeps text content."""
        return self.stores_text

    def add(self, nodes: Sequence[BaseNode], **kwargs: Any) -> list[str]:
        """
        Add nodes with embeddings.

        Args:
            nodes: Sequence of LlamaIndex BaseNodes.
            **kwargs: Unused.

        Returns:
            List of node IDs.
        """
        texts = [node.get_content() for node in nodes]
        metadatas = [node.metadata for node in nodes]

        # Extract embeddings, ensuring all are valid or set to None
        embeddings = None
        if nodes and nodes[0].embedding is not None:
            # Ensure all embeddings are present (not None)
            emb_list = []
            all_have_embeddings = True
            for node in nodes:
                if node.embedding is None:
                    all_have_embeddings = False
                    break
                emb_list.append(node.embedding)

            if all_have_embeddings:
                embeddings = emb_list

        # Add to DB and get internal IDs
        internal_ids = self._collection.add_texts(texts, metadatas, embeddings)

        # Track mapping from internal ID to node ID
        node_ids = []
        for i, node in enumerate(nodes):
            internal_id = internal_ids[i]
            node_id = node.node_id or str(internal_id)
            self._id_map[internal_id] = node_id
            node_ids.append(node_id)

        return node_ids

    def delete(self, ref_doc_id: str, **delete_kwargs: Any) -> None:
        """
        Delete by ref_doc_id (node ID).

        Args:
            ref_doc_id: The node ID to delete.
            **delete_kwargs: Unused.
        """
        # Find internal ID from node ID
        internal_id = None
        for int_id, node_id in self._id_map.items():
            if node_id == ref_doc_id:
                internal_id = int_id
                break

        if internal_id is not None:
            self._collection.delete_by_ids([internal_id])
            del self._id_map[internal_id]

    def delete_nodes(
        self,
        node_ids: list[str] | None = None,
        filters: MetadataFilters | None = None,
        **delete_kwargs: Any,
    ) -> None:
        """
        Delete nodes from vector store.

        Args:
            node_ids: List of node IDs to delete.
            filters: Metadata filters (unused).
            **delete_kwargs: Unused.
        """
        if node_ids:
            for node_id in node_ids:
                self.delete(node_id)

    def _filters_to_dict(
        self, filters: MetadataFilters | None
    ) -> dict[str, Any] | None:
        if filters is None:
            return None
        result: dict[str, Any] = {}
        if hasattr(filters, "filters"):
            for filter_item in filters.filters:  # type: ignore[attr-defined]
                if hasattr(filter_item, "key") and hasattr(filter_item, "value"):
                    key = getattr(filter_item, "key")
                    value = getattr(filter_item, "value")
                    result[key] = value
        return result or None

    def _build_query_result(
        self,
        docs_with_scores: list[tuple["Document", float]],
        score_transform,
    ) -> VectorStoreQueryResult:
        nodes: list[TextNode] = []
        similarities: list[float] = []
        ids: list[str] = []

        for tiny_doc, score in docs_with_scores:
            node_id = str(hash(tiny_doc.page_content))
            node = TextNode(
                text=tiny_doc.page_content,
                metadata=tiny_doc.metadata or {},
                id_=node_id,
                relationships={},
            )
            nodes.append(node)
            similarities.append(score_transform(score))
            ids.append(node_id)

        return VectorStoreQueryResult(nodes=nodes, similarities=similarities, ids=ids)

    def query(self, query: VectorStoreQuery, **kwargs: Any) -> VectorStoreQueryResult:
        """Support dense, keyword, or hybrid lookups based on the requested mode."""

        filter_dict = self._filters_to_dict(query.filters)
        mode = getattr(query, "mode", VectorStoreQueryMode.DEFAULT)
        mode_value = getattr(mode, "value", mode)
        normalized_mode = str(mode_value).lower() if mode_value else "default"

        keyword_modes = {
            VectorStoreQueryMode.SPARSE.value,
            VectorStoreQueryMode.TEXT_SEARCH.value,
        }
        hybrid_modes = {
            VectorStoreQueryMode.HYBRID.value,
            VectorStoreQueryMode.SEMANTIC_HYBRID.value,
        }

        if normalized_mode in keyword_modes:
            if not query.query_str:
                raise ValueError("Keyword search requires query_str")
            results = self._collection.keyword_search(
                query.query_str,
                k=query.similarity_top_k,
                filter=filter_dict,
            )
            return self._build_query_result(results, lambda score: 1.0 / (1.0 + score))

        if normalized_mode in hybrid_modes:
            if not query.query_str:
                raise ValueError("Hybrid search requires query_str")
            results = self._collection.hybrid_search(
                query.query_str,
                k=query.similarity_top_k,
                filter=filter_dict,
                query_vector=query.query_embedding,
            )
            return self._build_query_result(results, lambda score: float(score))

        # Fallback to dense/vector search
        query_emb = query.query_embedding
        if query_emb is None:
            if query.query_str:
                query_input: str | list[float] = query.query_str
            else:
                raise ValueError("Either query_embedding or query_str must be provided")
        else:
            query_input = query_emb

        results = self._collection.similarity_search(
            query=query_input,
            k=query.similarity_top_k,
            filter=filter_dict,
        )
        return self._build_query_result(results, lambda distance: 1 - distance)

client property

Return the underlying client (our VectorDB).

store_text property

Whether the store keeps text content.

add(nodes, **kwargs)

Add nodes with embeddings.

Parameters:

Name Type Description Default
nodes Sequence[BaseNode]

Sequence of LlamaIndex BaseNodes.

required
**kwargs Any

Unused.

{}

Returns:

Type Description
list[str]

List of node IDs.

Source code in src/simplevecdb/integrations/llamaindex.py
def add(self, nodes: Sequence[BaseNode], **kwargs: Any) -> list[str]:
    """
    Add nodes with embeddings.

    Args:
        nodes: Sequence of LlamaIndex BaseNodes.
        **kwargs: Unused.

    Returns:
        List of node IDs.
    """
    texts = [node.get_content() for node in nodes]
    metadatas = [node.metadata for node in nodes]

    # Extract embeddings, ensuring all are valid or set to None
    embeddings = None
    if nodes and nodes[0].embedding is not None:
        # Ensure all embeddings are present (not None)
        emb_list = []
        all_have_embeddings = True
        for node in nodes:
            if node.embedding is None:
                all_have_embeddings = False
                break
            emb_list.append(node.embedding)

        if all_have_embeddings:
            embeddings = emb_list

    # Add to DB and get internal IDs
    internal_ids = self._collection.add_texts(texts, metadatas, embeddings)

    # Track mapping from internal ID to node ID
    node_ids = []
    for i, node in enumerate(nodes):
        internal_id = internal_ids[i]
        node_id = node.node_id or str(internal_id)
        self._id_map[internal_id] = node_id
        node_ids.append(node_id)

    return node_ids

delete(ref_doc_id, **delete_kwargs)

Delete by ref_doc_id (node ID).

Parameters:

Name Type Description Default
ref_doc_id str

The node ID to delete.

required
**delete_kwargs Any

Unused.

{}
Source code in src/simplevecdb/integrations/llamaindex.py
def delete(self, ref_doc_id: str, **delete_kwargs: Any) -> None:
    """
    Delete by ref_doc_id (node ID).

    Args:
        ref_doc_id: The node ID to delete.
        **delete_kwargs: Unused.
    """
    # Find internal ID from node ID
    internal_id = None
    for int_id, node_id in self._id_map.items():
        if node_id == ref_doc_id:
            internal_id = int_id
            break

    if internal_id is not None:
        self._collection.delete_by_ids([internal_id])
        del self._id_map[internal_id]

delete_nodes(node_ids=None, filters=None, **delete_kwargs)

Delete nodes from vector store.

Parameters:

Name Type Description Default
node_ids list[str] | None

List of node IDs to delete.

None
filters MetadataFilters | None

Metadata filters (unused).

None
**delete_kwargs Any

Unused.

{}
Source code in src/simplevecdb/integrations/llamaindex.py
def delete_nodes(
    self,
    node_ids: list[str] | None = None,
    filters: MetadataFilters | None = None,
    **delete_kwargs: Any,
) -> None:
    """
    Delete nodes from vector store.

    Args:
        node_ids: List of node IDs to delete.
        filters: Metadata filters (unused).
        **delete_kwargs: Unused.
    """
    if node_ids:
        for node_id in node_ids:
            self.delete(node_id)

query(query, **kwargs)

Support dense, keyword, or hybrid lookups based on the requested mode.

Source code in src/simplevecdb/integrations/llamaindex.py
def query(self, query: VectorStoreQuery, **kwargs: Any) -> VectorStoreQueryResult:
    """Support dense, keyword, or hybrid lookups based on the requested mode."""

    filter_dict = self._filters_to_dict(query.filters)
    mode = getattr(query, "mode", VectorStoreQueryMode.DEFAULT)
    mode_value = getattr(mode, "value", mode)
    normalized_mode = str(mode_value).lower() if mode_value else "default"

    keyword_modes = {
        VectorStoreQueryMode.SPARSE.value,
        VectorStoreQueryMode.TEXT_SEARCH.value,
    }
    hybrid_modes = {
        VectorStoreQueryMode.HYBRID.value,
        VectorStoreQueryMode.SEMANTIC_HYBRID.value,
    }

    if normalized_mode in keyword_modes:
        if not query.query_str:
            raise ValueError("Keyword search requires query_str")
        results = self._collection.keyword_search(
            query.query_str,
            k=query.similarity_top_k,
            filter=filter_dict,
        )
        return self._build_query_result(results, lambda score: 1.0 / (1.0 + score))

    if normalized_mode in hybrid_modes:
        if not query.query_str:
            raise ValueError("Hybrid search requires query_str")
        results = self._collection.hybrid_search(
            query.query_str,
            k=query.similarity_top_k,
            filter=filter_dict,
            query_vector=query.query_embedding,
        )
        return self._build_query_result(results, lambda score: float(score))

    # Fallback to dense/vector search
    query_emb = query.query_embedding
    if query_emb is None:
        if query.query_str:
            query_input: str | list[float] = query.query_str
        else:
            raise ValueError("Either query_embedding or query_str must be provided")
    else:
        query_input = query_emb

    results = self._collection.similarity_search(
        query=query_input,
        k=query.similarity_top_k,
        filter=filter_dict,
    )
    return self._build_query_result(results, lambda distance: 1 - distance)