Skip to main content

RAGPipeline API Reference

RAGConfig

from synapsekit import RAGConfig
ParameterTypeDefaultDescription
llmBaseLLMrequiredThe language model used for answer generation
vector_storeVectorStorerequiredThe vector store used for chunk retrieval
kint5Number of chunks to retrieve per query
prompt_templatePromptTemplate | NoneNoneCustom prompt template; default stuffing template used if None
memoryBaseMemory | NoneNoneConversation memory backend for multi-turn chat
retrieverRetriever | NoneNoneCustom retriever; overrides vector_store if provided
streamboolFalseWhether to stream responses by default
chunk_sizeint512Token size for document chunking via add()
chunk_overlapint64Token overlap between adjacent chunks
splitterBaseSplitter | NoneNoneCustom splitter; overrides chunk_size/chunk_overlap if provided
temperaturefloat | NoneNoneOverride LLM temperature for this pipeline
metadata_fieldslist[str] | NoneNoneDocument metadata fields to index alongside text
from synapsekit import RAGConfig, OpenAILLM, InMemoryVectorStore, LLMConfig, SynapsekitEmbeddings

llm = OpenAILLM(LLMConfig(model="gpt-4o-mini", api_key="sk-..."))
store = InMemoryVectorStore(SynapsekitEmbeddings())
config = RAGConfig(llm=llm, vector_store=store, k=5)

RAGPipeline

from synapsekit import RAGPipeline

rag = RAGPipeline(config: RAGConfig)

Short alias: from synapsekit import RAG (same class, shorter name).

add(documents, metadata=None)

Add documents synchronously. Documents are chunked and embedded before being stored.

def add(
documents: list[str],
metadata: list[dict] | None = None,
) -> None
ParameterTypeDefaultDescription
documentslist[str]requiredRaw document strings to chunk and embed
metadatalist[dict] | NoneNoneMetadata dicts — one per document, not per chunk
rag.add(
["Document 1 text", "Document 2 text"],
metadata=[{"source": "doc1.txt"}, {"source": "doc2.txt"}],
)

async aadd(documents, metadata=None)

Add documents asynchronously. Preferred in async contexts.

async def aadd(
documents: list[str],
metadata: list[dict] | None = None,
) -> None
await rag.aadd(["Document 1", "Document 2"])

query(query, **kwargs)

Query synchronously and return a string response.

def query(
query: str,
k: int | None = None,
metadata_filter: dict | None = None,
) -> str
ParameterTypeDefaultDescription
querystrrequiredThe user question
kint | Noneconfig.kOverride number of retrieved chunks
metadata_filterdict | NoneNoneFilter retrieved documents by metadata field values
answer = rag.query("What is SynapseKit?")

async aquery(query, **kwargs)

Query asynchronously. Returns a string response.

async def aquery(
query: str,
k: int | None = None,
metadata_filter: dict | None = None,
) -> str
answer = await rag.aquery("What is SynapseKit?")

async astream(query, **kwargs)

Stream the response token by token. Returns an AsyncIterator[str].

async def astream(
query: str,
k: int | None = None,
metadata_filter: dict | None = None,
) -> AsyncIterator[str]
ParameterTypeDefaultDescription
querystrrequiredThe user question
kint | Noneconfig.kOverride number of retrieved chunks
metadata_filterdict | NoneNoneFilter retrieved documents by metadata
async for token in rag.astream("Explain SynapseKit in detail"):
print(token, end="", flush=True)

save(path)

Persist the pipeline's vector store to disk.

def save(path: str) -> None
rag.save("/data/my_rag_index")

Raises NotImplementedError if the underlying vector store does not support persistence.


load(path)

Load a previously saved vector store from disk.

def load(path: str) -> None
rag = RAG(config)
rag.load("/data/my_rag_index")

async get_relevant_documents(query, k=None, metadata_filter=None)

Retrieve documents without generating an answer. Returns list[dict] with "text", "score", "metadata" keys.

docs = await rag.get_relevant_documents("What is SynapseKit?", k=3)
for doc in docs:
print(f"Score: {doc['score']:.3f} | {doc['text'][:80]}")

Full example

import asyncio
from synapsekit import RAG, RAGConfig, OpenAILLM, InMemoryVectorStore, SynapsekitEmbeddings, LLMConfig

async def main():
llm = OpenAILLM(LLMConfig(model="gpt-4o-mini", api_key="sk-..."))
store = InMemoryVectorStore(SynapsekitEmbeddings())
config = RAGConfig(llm=llm, vector_store=store, k=4)
rag = RAG(config)

await rag.aadd([
"SynapseKit is an async-first Python library for building LLM applications.",
"It supports RAG, agents, graph workflows, and multi-agent systems.",
])

answer = await rag.aquery("What is SynapseKit?")
print(answer)

async for token in rag.astream("What is SynapseKit used for?"):
print(token, end="", flush=True)

asyncio.run(main())

See also