Skip to main content

AutoGen

Cache agent responses for both the new autogen-agentchat (0.4+) and legacy pyautogen (0.2.x) APIs.

# New API (0.4+)
pip install 'autogen-agentchat>=0.4'

# Legacy API (0.2.x)
pip install 'omnicache-ai[autogen]'

4.1 autogen-agentchat 0.4+ -- AssistantAgent with async caching

Wrap an AssistantAgent so that repeated queries return instantly from cache.

import asyncio
from autogen_agentchat.agents import AssistantAgent
from autogen_agentchat.ui import Console
from autogen_ext.models.openai import OpenAIChatCompletionClient
from omnicache_ai import CacheManager, InMemoryBackend, CacheKeyBuilder
from omnicache_ai.adapters.autogen_adapter import AutoGenCacheAdapter

manager = CacheManager(
backend=InMemoryBackend(),
key_builder=CacheKeyBuilder(namespace="ag"),
)

model_client = OpenAIChatCompletionClient(model="gpt-4o-mini")
agent = AssistantAgent("assistant", model_client=model_client)
cached = AutoGenCacheAdapter(agent, manager)

async def main():
# First call — hits the model
result1 = await cached.arun("What is the speed of light?")
# Second call — instant cache hit
result2 = await cached.arun("What is the speed of light?")
print("Same result:", result1 == result2)

asyncio.run(main())

4.2 autogen-agentchat 0.4+ -- RoundRobinGroupChat with cached agents

Wrap each agent independently and pass the inner agents to the team.

import asyncio
from autogen_agentchat.agents import AssistantAgent
from autogen_agentchat.teams import RoundRobinGroupChat
from autogen_agentchat.conditions import MaxMessageTermination
from autogen_ext.models.openai import OpenAIChatCompletionClient
from omnicache_ai import CacheManager, DiskBackend, CacheKeyBuilder
from omnicache_ai.adapters.autogen_adapter import AutoGenCacheAdapter

manager = CacheManager(
backend=DiskBackend(directory="/var/cache/autogen"),
key_builder=CacheKeyBuilder(namespace="ag"),
)
client = OpenAIChatCompletionClient(model="gpt-4o-mini")

# Wrap each agent independently
researcher = AutoGenCacheAdapter(
AssistantAgent("researcher", model_client=client,
system_message="Research and provide facts."),
manager,
)
summarizer = AutoGenCacheAdapter(
AssistantAgent("summarizer", model_client=client,
system_message="Summarize concisely."),
manager,
)

async def main():
team = RoundRobinGroupChat(
[researcher._agent, summarizer._agent], # pass inner agents to team
termination_condition=MaxMessageTermination(4),
)
result = await team.run(task="Explain transformer attention in 3 sentences.")
print(result.messages[-1].content)

asyncio.run(main())

4.3 pyautogen 0.2.x -- ConversableAgent (legacy)

The adapter also works with the legacy pyautogen ConversableAgent API.

from autogen import ConversableAgent, UserProxyAgent
from omnicache_ai import CacheManager, InMemoryBackend, CacheKeyBuilder
from omnicache_ai.adapters.autogen_adapter import AutoGenCacheAdapter

manager = CacheManager(
backend=InMemoryBackend(),
key_builder=CacheKeyBuilder(namespace="ag-legacy"),
)

assistant = ConversableAgent(
name="assistant",
llm_config={"config_list": [{"model": "gpt-4o-mini", "api_key": "..."}]},
system_message="You are a helpful assistant.",
)
cached_assistant = AutoGenCacheAdapter(assistant, manager)

messages = [{"role": "user", "content": "What is 10 * 10?"}]

reply1 = cached_assistant.generate_reply(messages) # API call
reply2 = cached_assistant.generate_reply(messages) # Cache hit

assert reply1 == reply2
print(reply1)

4.4 AutoGen + LangChain embeddings cached together

Share a single CacheManager for both agent response caching and embedding caching.

import asyncio
from autogen_agentchat.agents import AssistantAgent
from autogen_ext.models.openai import OpenAIChatCompletionClient
from langchain_openai import OpenAIEmbeddings
from omnicache_ai import (
CacheManager, InMemoryBackend, CacheKeyBuilder,
EmbeddingCache, TTLPolicy,
)
from omnicache_ai.adapters.autogen_adapter import AutoGenCacheAdapter

manager = CacheManager(
backend=InMemoryBackend(),
key_builder=CacheKeyBuilder(namespace="combo"),
ttl_policy=TTLPolicy(default_ttl=3600, per_type={"embed": 86400}),
)
emb_cache = EmbeddingCache(manager)
embedder = OpenAIEmbeddings(model="text-embedding-3-small")
agent = AssistantAgent("assistant", model_client=OpenAIChatCompletionClient(model="gpt-4o-mini"))
cached_agent = AutoGenCacheAdapter(agent, manager)

async def main():
# Cache embeddings for retrieval context
vec = emb_cache.get_or_compute(
"transformer attention mechanism",
lambda t: embedder.embed_query(t),
model_id="text-embedding-3-small",
)
print("Embedding shape:", vec.shape)

# Cache agent replies
reply = await cached_agent.arun("What is the transformer attention mechanism?")
print("Agent:", reply)

asyncio.run(main())