How to stream Claude API responses in TypeScript using the Anthropic SDK. Includes typed event handlers, SSE parsing, Next.js integration, and error handling patterns.
The Claude API supports server-sent events (SSE) streaming so your TypeScript application can display tokens as they arrive rather than waiting for the full response. This guide shows the production patterns.
import Anthropic from "@anthropic-ai/sdk";
import type { MessageStreamEvent } from "@anthropic-ai/sdk/resources/messages";
const client = new Anthropic();
const stream = client.messages.stream({
model: "claude-sonnet-4-6",
max_tokens: 1024,
messages: [{ role: "user", content: "Explain TypeScript generics." }]
});
stream.on("text", (text: string) => {
process.stdout.write(text); // fires on each token
});
const finalMessage = await stream.finalMessage();
console.log("
Usage:", finalMessage.usage);
for await (const event of stream as AsyncIterable<MessageStreamEvent>) {
switch (event.type) {
case "message_start":
console.log("Model:", event.message.model);
break;
case "content_block_delta":
if (event.delta.type === "text_delta") {
process.stdout.write(event.delta.text);
}
break;
case "message_delta":
if (event.delta.stop_reason === "end_turn") {
console.log("
Tokens used:", event.usage.output_tokens);
}
break;
}
}
// app/api/chat/route.ts
import Anthropic from "@anthropic-ai/sdk";
import { NextRequest } from "next/server";
const client = new Anthropic();
export async function POST(req: NextRequest) {
const { message } = await req.json();
const encoder = new TextEncoder();
const readable = new ReadableStream({
async start(controller) {
const stream = client.messages.stream({
model: "claude-sonnet-4-6",
max_tokens: 1024,
messages: [{ role: "user", content: message }]
});
stream.on("text", (text) => {
controller.enqueue(encoder.encode(text));
});
await stream.finalMessage();
controller.close();
}
});
return new Response(readable, {
headers: {
"Content-Type": "text/plain; charset=utf-8",
"Transfer-Encoding": "chunked"
}
});
}
const stream = client.messages.stream({
model: "claude-sonnet-4-6",
max_tokens: 1024,
tools: [myTool],
messages
});
for await (const event of stream) {
if (event.type === "content_block_start" && event.content_block.type === "tool_use") {
console.log("Tool invoked:", event.content_block.name);
}
if (event.type === "content_block_delta" && event.delta.type === "input_json_delta") {
process.stdout.write(event.delta.partial_json); // stream the tool args
}
}
const controller = new AbortController();
const stream = client.messages.stream(
{ model: "claude-sonnet-4-6", max_tokens: 1024, messages },
{ signal: controller.signal }
);
setTimeout(() => controller.abort(), 3000); // cancel after 3s
The Anthropic API charges identically for streaming and non-streaming requests — the same input/output token counts apply. The tradeoff is latency UX vs implementation simplicity. Use the Claude Cost Calculator to model token costs.