Use AsyncAnthropic to call Claude in async Python applications. Concurrent requests with asyncio.gather, async streaming, FastAPI integration, and performance tips.
The Anthropic Python SDK ships an AsyncAnthropic client with the same interface as the sync version, but all methods return coroutines. Use it whenever your code runs inside asyncio — FastAPI, Starlette, or any event-loop-based server.
import asyncio
import anthropic
async def main():
client = anthropic.AsyncAnthropic()
message = await client.messages.create(
model="claude-sonnet-4-6",
max_tokens=1024,
messages=[{"role": "user", "content": "What is the capital of France?"}]
)
print(message.content[0].text)
asyncio.run(main())
This is the main reason to use async — fire N requests simultaneously and collect results, rather than waiting for each one sequentially.
import asyncio, anthropic
client = anthropic.AsyncAnthropic()
async def summarize(text: str, label: str) -> str:
msg = await client.messages.create(
model="claude-haiku-4-5",
max_tokens=256,
messages=[{"role": "user", "content": f"Summarise in 2 sentences: {text}"}]
)
return f"{label}: {msg.content[0].text}"
async def main():
docs = [
("Long document one...", "doc-1"),
("Long document two...", "doc-2"),
("Long document three...", "doc-3"),
]
results = await asyncio.gather(*[summarize(text, label) for text, label in docs])
for r in results:
print(r)
asyncio.run(main())
import asyncio, anthropic
client = anthropic.AsyncAnthropic()
async def stream_response():
async with client.messages.stream(
model="claude-sonnet-4-6",
max_tokens=1024,
messages=[{"role": "user", "content": "Write a poem about asyncio."}]
) as stream:
async for text in stream.text_stream:
print(text, end="", flush=True)
print() # newline at end
asyncio.run(stream_response())
from fastapi import FastAPI
from fastapi.responses import StreamingResponse
import anthropic
app = FastAPI()
client = anthropic.AsyncAnthropic()
@app.post("/summarize")
async def summarize(body: dict):
message = await client.messages.create(
model="claude-haiku-4-5",
max_tokens=512,
messages=[{"role": "user", "content": f"Summarise: {body['text']}"}]
)
return {"summary": message.content[0].text}
@app.post("/stream")
async def stream_chat(body: dict):
async def generate():
async with client.messages.stream(
model="claude-sonnet-4-6",
max_tokens=1024,
messages=[{"role": "user", "content": body["prompt"]}]
) as stream:
async for text in stream.text_stream:
yield text
return StreamingResponse(generate(), media_type="text/plain")
import asyncio, anthropic
client = anthropic.AsyncAnthropic()
async def bounded_call(sem: asyncio.Semaphore, prompt: str) -> str:
async with sem:
msg = await client.messages.create(
model="claude-haiku-4-5",
max_tokens=256,
messages=[{"role": "user", "content": prompt}]
)
return msg.content[0].text
async def main(prompts: list[str], concurrency: int = 10):
sem = asyncio.Semaphore(concurrency)
return await asyncio.gather(*[bounded_call(sem, p) for p in prompts])
results = asyncio.run(main(["Summarise X", "Classify Y", "Extract Z"], concurrency=5))
For cost modelling on concurrent async workloads, use the Claude Cost Calculator. The Prompt-Pricing Recommender helps choose Haiku vs Sonnet for high-concurrency pipelines.