Error Handling

Robust LLM applications handle transient API failures, provider outages, budget limits, and tool errors gracefully. This guide covers every error handling pattern in SynapseKit.

Prerequisites

pip install synapsekit[openai,anthropic]

1. LLM retries with exponential backoff

Configure LLMConfig to automatically retry on transient errors (rate limits, 5xx responses, timeouts).

from synapsekit.llms.base import LLMConfig
from synapsekit.llms.openai import OpenAILLM


llm = OpenAILLM(
    model="gpt-4o-mini",
    config=LLMConfig(
        max_retries=3,          # Retry up to 3 times
        retry_delay=1.0,        # Start with 1s delay
        retry_backoff=2.0,      # Double each retry: 1s → 2s → 4s
        retry_on_status=[429, 500, 502, 503, 504],  # Status codes to retry
        timeout=30.0,           # Per-request timeout in seconds
    ),
)

import asyncio

async def main():
    # If the first attempt hits a 429, it retries automatically
    answer = await llm.generate("What is RAG?")
    print(answer)
    # Expected output: RAG (Retrieval-Augmented Generation) combines a
    # retrieval system with a language model to answer questions...

asyncio.run(main())

Catching `MaxRetriesExceeded`

from synapsekit.exceptions import MaxRetriesExceededError
import asyncio


async def main():
    try:
        answer = await llm.generate("What is RAG?")
    except MaxRetriesExceededError as e:
        print(f"All {e.attempts} attempts failed. Last error: {e.last_error}")
        # Expected output: All 3 attempts failed. Last error: Rate limit exceeded

asyncio.run(main())

2. Provider fallback pattern

When one provider is down, automatically fail over to the next.

import asyncio
from synapsekit.llms.openai import OpenAILLM
from synapsekit.llms.anthropic import AnthropicLLM
from synapsekit.llms.ollama import OllamaLLM


async def generate_with_fallback(prompt: str) -> str:
    """Try each provider in order; return first successful response."""
    providers = [
        OpenAILLM(model="gpt-4o"),
        AnthropicLLM(model="claude-sonnet-4-6"),
        OllamaLLM(model="llama3"),  # Local fallback — always available
    ]
    last_error = None
    for llm in providers:
        try:
            return await llm.generate(prompt)
        except Exception as e:
            print(f"Provider {llm.__class__.__name__} failed: {e}, trying next...")
            last_error = e
    raise RuntimeError(f"All providers failed. Last error: {last_error}")


async def main():
    answer = await generate_with_fallback("Explain vector databases in one sentence")
    print(answer)
    # Expected output (from whichever provider succeeds):
    # A vector database stores high-dimensional embeddings and enables
    # fast similarity search over them.

asyncio.run(main())

Using the built-in `FallbackLLM`

from synapsekit.llms import FallbackLLM
from synapsekit.llms.openai import OpenAILLM
from synapsekit.llms.anthropic import AnthropicLLM
import asyncio


async def main():
    llm = FallbackLLM(
        providers=[
            OpenAILLM(model="gpt-4o"),
            AnthropicLLM(model="claude-sonnet-4-6"),
        ],
        on_fallback=lambda provider, err: print(f"Fell back from {provider}: {err}"),
    )

    result = await llm.generate("What is the speed of light?")
    print(result)
    # Expected output: The speed of light in a vacuum is approximately
    # 299,792,458 metres per second (about 3 × 10^8 m/s).

asyncio.run(main())

3. BudgetExceededError handling

BudgetGuard raises BudgetExceededError when spending exceeds a configured threshold.

import asyncio
from synapsekit.llms.openai import OpenAILLM
from synapsekit.observability import CostTracker
from synapsekit.guardrails import BudgetGuard
from synapsekit.exceptions import BudgetExceededError


async def main():
    llm = OpenAILLM(model="gpt-4o-mini")
    tracker = CostTracker()

    # Raise BudgetExceededError when spend exceeds $0.10
    guard = BudgetGuard(tracker=tracker, budget_usd=0.10)
    guarded_llm = guard.wrap(llm)

    questions = [
        "Explain quantum computing in detail",
        "Write a 500-word essay on climate change",
        "List 50 Python best practices with examples",
    ]

    total_spent = 0.0
    for question in questions:
        try:
            answer = await guarded_llm.generate(question)
            cost = tracker.last_call_cost_usd
            total_spent += cost
            print(f"Cost: ${cost:.4f} | {question[:40]}...")
        except BudgetExceededError as e:
            print(f"Budget exceeded! Spent ${e.spent_usd:.4f} of ${e.budget_usd:.4f}")
            print("Switching to cheaper model...")
            # Fall back to a cheaper model for remaining questions
            cheap_llm = OpenAILLM(model="gpt-4o-mini")
            answer = await cheap_llm.generate(question)

    print(f"\nTotal spent: ${total_spent:.4f}")
    # Expected output:
    # Cost: $0.0012 | Explain quantum computing in detail...
    # Cost: $0.0034 | Write a 500-word essay on climate change...
    # Budget exceeded! Spent $0.1003 of $0.1000
    # Switching to cheaper model...

asyncio.run(main())

4. GraphInterrupt handling and resume

StateGraph supports human-in-the-loop interrupts. Handle GraphInterruptError to pause, collect input, and resume.

import asyncio
from synapsekit.graph import StateGraph
from synapsekit.graph.exceptions import GraphInterruptError


async def draft_content(state: dict) -> dict:
    return {"draft": f"Draft response for: {state['input']}"}


async def review_gate(state: dict) -> dict:
    """Interrupt here for human review."""
    from synapsekit.graph import interrupt
    approved = await interrupt(
        state,
        message=f"Please review this draft:\n\n{state['draft']}\n\nApprove? (yes/no)",
    )
    return {"approved": approved, "review_done": True}


async def publish(state: dict) -> dict:
    if state.get("approved"):
        return {"status": "published", "content": state["draft"]}
    return {"status": "rejected"}


async def main():
    graph = StateGraph()
    graph.add_node("draft", draft_content)
    graph.add_node("review", review_gate)
    graph.add_node("publish", publish)
    graph.add_edge("draft", "review")
    graph.add_edge("review", "publish")
    graph.set_entry_point("draft")
    compiled = graph.compile(checkpointer="sqlite")  # Persist state for resume

    thread_id = "content-review-001"

    # First run — will pause at the interrupt node
    try:
        result = await compiled.ainvoke(
            {"input": "Write a blog post about RAG"},
            config={"thread_id": thread_id},
        )
    except GraphInterruptError as e:
        print(f"Paused: {e.message}")
        # Expected output: Paused: Please review this draft: ...

        # Simulate human review
        human_input = "yes"
        print(f"Human input: {human_input}")

        # Resume from the interrupt point
        result = await compiled.ainvoke(
            {"input": human_input},
            config={"thread_id": thread_id},
        )
        print(f"Final status: {result['status']}")
        # Expected output: Final status: published

asyncio.run(main())

5. Rate limiting

Protect against hitting provider rate limits by throttling requests.

import asyncio
from synapsekit.llms.openai import OpenAILLM
from synapsekit.utils import RateLimiter


async def main():
    llm = OpenAILLM(model="gpt-4o-mini")

    # Allow at most 10 requests per minute
    limiter = RateLimiter(requests_per_minute=10)

    questions = [f"Question {i}: What is {i} + {i}?" for i in range(20)]

    async def ask_with_limit(question: str) -> str:
        async with limiter:
            return await llm.generate(question)

    # Process in batches with concurrency limit
    semaphore = asyncio.Semaphore(3)  # Max 3 concurrent requests

    async def ask_bounded(question: str) -> str:
        async with semaphore:
            return await ask_with_limit(question)

    results = await asyncio.gather(*[ask_bounded(q) for q in questions[:5]])
    for q, r in zip(questions[:5], results):
        print(f"{q} -> {r[:40]}")

asyncio.run(main())

6. Tool error handling

Agents recover gracefully when tools return errors.

import asyncio
from synapsekit.agents import FunctionCallingAgent
from synapsekit.llms.openai import OpenAILLM
from synapsekit.tools import tool


@tool
async def risky_api_call(endpoint: str) -> str:
    """Call an external API endpoint.

    Args:
        endpoint: The API endpoint URL to call
    """
    import aiohttp
    try:
        async with aiohttp.ClientSession() as session:
            async with session.get(endpoint, timeout=aiohttp.ClientTimeout(total=5)) as r:
                if r.status != 200:
                    # Return error as string — agent will handle it
                    return f"Error: API returned HTTP {r.status}"
                return await r.text()
    except aiohttp.ClientConnectorError:
        return f"Error: Could not connect to {endpoint}"
    except TimeoutError:
        return f"Error: Request timed out after 5 seconds"


async def main():
    llm = OpenAILLM(model="gpt-4o-mini")
    agent = FunctionCallingAgent(
        llm=llm,
        tools=[risky_api_call],
        max_iterations=5,
        on_tool_error="continue",  # Continue on tool errors (default: "raise")
    )

    result = await agent.run("Fetch data from https://nonexistent-api.example.com")
    print(result)
    # Expected output: I tried to fetch the data from that URL, but received
    # an error: Could not connect to https://nonexistent-api.example.com.
    # The service may be unavailable. Please check the URL or try again later.

asyncio.run(main())

7. Common exceptions reference

Exception	When raised	How to handle
`MaxRetriesExceededError`	All retry attempts failed	Fallback to another provider
`BudgetExceededError`	Spend exceeds `BudgetGuard` limit	Switch to cheaper model or stop
`GraphInterruptError`	`interrupt()` called in a node	Collect human input and resume
`RateLimitError`	Provider rate limit hit	`RateLimiter` or exponential backoff
`AuthenticationError`	Invalid API key	Check environment variable
`ContextLengthExceededError`	Prompt too long for model	Truncate context or use larger model
`ToolExecutionError`	Unhandled exception in tool	Wrap tool body in try/except
`EmbeddingError`	Embeddings API failed	Retry or fallback embeddings provider

Catching all SynapseKit errors

from synapsekit.exceptions import SynapseKitError
import asyncio


async def safe_generate(llm, prompt: str) -> str | None:
    """Return None instead of raising on any SynapseKit error."""
    try:
        return await llm.generate(prompt)
    except SynapseKitError as e:
        print(f"SynapseKit error ({type(e).__name__}): {e}")
        return None
    except Exception as e:
        print(f"Unexpected error: {type(e).__name__}: {e}")
        return None

8. Structured error logging

import asyncio
import logging
from synapsekit.llms.openai import OpenAILLM
from synapsekit.exceptions import MaxRetriesExceededError, BudgetExceededError

logging.basicConfig(
    level=logging.INFO,
    format="%(asctime)s %(levelname)s %(name)s: %(message)s",
)
logger = logging.getLogger("my_app")


async def robust_query(llm, prompt: str) -> str:
    try:
        result = await llm.generate(prompt)
        logger.info("LLM call succeeded", extra={"prompt_len": len(prompt)})
        return result
    except MaxRetriesExceededError as e:
        logger.error(
            "LLM retries exhausted",
            extra={"attempts": e.attempts, "last_error": str(e.last_error)},
        )
        raise
    except BudgetExceededError as e:
        logger.warning(
            "Budget exceeded",
            extra={"spent": e.spent_usd, "budget": e.budget_usd},
        )
        raise
    except Exception as e:
        logger.exception("Unexpected error in LLM call")
        raise

Summary

Pattern	Recommended for
`LLMConfig(max_retries=3)`	Every production LLM call
`FallbackLLM`	High-availability requirements
`BudgetGuard`	Any cost-sensitive application
`GraphInterruptError` + resume	Human-in-the-loop workflows
`RateLimiter`	Batch processing jobs
Return error strings from tools	Agent self-correction

Prerequisites​

1. LLM retries with exponential backoff​

Catching MaxRetriesExceeded​

2. Provider fallback pattern​

Using the built-in FallbackLLM​

3. BudgetExceededError handling​

4. GraphInterrupt handling and resume​

5. Rate limiting​

6. Tool error handling​

7. Common exceptions reference​

Catching all SynapseKit errors​

8. Structured error logging​

Summary​