Skip to main content

Vector Store Backends

All vector stores implement the VectorStore ABC and share the same interface.

from synapsekit.retrieval.base import VectorStore

class VectorStore(ABC):
async def add(self, texts: list[str], metadata: list[dict] | None = None) -> None: ...
async def search(self, query: str, top_k: int = 5, metadata_filter: dict | None = None) -> list[dict]: ...
async def search_mmr(self, query: str, top_k: int = 5, lambda_mult: float = 0.5, fetch_k: int = 20) -> list[dict]: ...
def save(self, path: str) -> None: ... # optional
def load(self, path: str) -> None: ... # optional

InMemoryVectorStore

Zero-dependency, numpy-backed store. Best for development and small datasets.

from synapsekit import InMemoryVectorStore, SynapsekitEmbeddings

embeddings = SynapsekitEmbeddings()
store = InMemoryVectorStore(embeddings)

await store.add(["chunk one", "chunk two"], metadata=[{"src": "doc1"}, {"src": "doc2"}])

results = await store.search("my query", top_k=3)
# results[0] -> {"text": "...", "score": 0.92, "metadata": {...}}

# Metadata filtering
results = await store.search("my query", top_k=3, metadata_filter={"src": "doc1"})

# MMR search (diversity-aware)
results = await store.search_mmr("my query", top_k=3, lambda_mult=0.5)

# Persist to disk
store.save("my_store.npz")

# Reload
store2 = InMemoryVectorStore(embeddings)
store2.load("my_store.npz")

ChromaVectorStore

Persistent or ephemeral Chroma backend.

pip install synapsekit[chroma]
from synapsekit import SynapsekitEmbeddings
from synapsekit.retrieval.chroma import ChromaVectorStore

embeddings = SynapsekitEmbeddings()

# Ephemeral (in-memory Chroma)
store = ChromaVectorStore(embeddings, collection_name="my_docs")

# Persistent (saved to disk)
store = ChromaVectorStore(
embeddings,
collection_name="my_docs",
persist_directory="./chroma_db",
)

await store.add(["doc text..."])
results = await store.search("query", top_k=5)

FAISSVectorStore

Facebook AI Similarity Search — fast exact and approximate nearest-neighbour search.

pip install synapsekit[faiss]
from synapsekit import SynapsekitEmbeddings
from synapsekit.retrieval.faiss import FAISSVectorStore

embeddings = SynapsekitEmbeddings()
store = FAISSVectorStore(embeddings)

await store.add(["doc one", "doc two"])
results = await store.search("query", top_k=5)

# Save / load
store.save("my_index") # writes my_index.faiss + my_index_texts.npy + my_index_meta.json
store.load("my_index")

QdrantVectorStore

Qdrant — production-grade vector database.

pip install synapsekit[qdrant]
from synapsekit import SynapsekitEmbeddings
from synapsekit.retrieval.qdrant import QdrantVectorStore

embeddings = SynapsekitEmbeddings()

store = QdrantVectorStore(
embeddings,
collection_name="my_docs",
url="http://localhost:6333",
api_key=None, # set for Qdrant Cloud
)

await store.add(["doc text..."])
results = await store.search("query", top_k=5)

PineconeVectorStore

Pinecone — managed serverless vector database.

pip install synapsekit[pinecone]
from synapsekit import SynapsekitEmbeddings
from synapsekit.retrieval.pinecone import PineconeVectorStore

embeddings = SynapsekitEmbeddings()

store = PineconeVectorStore(
embeddings,
index_name="my-index",
api_key="pcsk_...",
)

await store.add(["doc text..."])
results = await store.search("query", top_k=5)

Using a custom backend with Retriever

Any VectorStore subclass plugs straight into Retriever and RAGPipeline:

from synapsekit import SynapsekitEmbeddings, Retriever
from synapsekit.retrieval.chroma import ChromaVectorStore
from synapsekit.rag.pipeline import RAGConfig, RAGPipeline
from synapsekit.llm.openai import OpenAILLM
from synapsekit.llm.base import LLMConfig
from synapsekit.memory.conversation import ConversationMemory

embeddings = SynapsekitEmbeddings()
store = ChromaVectorStore(embeddings, persist_directory="./db")
retriever = Retriever(store, rerank=True)

pipeline = RAGPipeline(RAGConfig(
llm=OpenAILLM(LLMConfig(model="gpt-4o-mini", api_key="sk-...")),
retriever=retriever,
memory=ConversationMemory(),
))

await pipeline.add("Your document text...")
answer = await pipeline.ask("Your question?")