Node.js code examples for streaming Claude API responses. Covers basic SSE streaming, Express integration, error handling, and token-by-token output using @anthropic-ai/sdk.
Streaming Claude responses in Node.js lets you start displaying output before generation completes — cutting perceived latency from several seconds to near-zero. The official Anthropic SDK handles SSE parsing transparently.
npm install @anthropic-ai/sdk
# Set ANTHROPIC_API_KEY in your environment or .env
import Anthropic from "@anthropic-ai/sdk";
const client = new Anthropic();
const stream = client.messages.stream({
model: "claude-sonnet-4-6",
max_tokens: 2048,
messages: [{ role: "user", content: "Write a Node.js HTTP server from scratch." }]
});
stream.on("text", (text) => process.stdout.write(text));
await stream.finalMessage();
import express from "express";
import Anthropic from "@anthropic-ai/sdk";
const app = express();
app.use(express.json());
const client = new Anthropic();
app.post("/chat", async (req, res) => {
res.setHeader("Content-Type", "text/event-stream");
res.setHeader("Cache-Control", "no-cache");
res.setHeader("Connection", "keep-alive");
const stream = client.messages.stream({
model: "claude-sonnet-4-6",
max_tokens: 1024,
messages: req.body.messages
});
stream.on("text", (text) => res.write(`data: ${JSON.stringify({ text })}
`));
await stream.finalMessage();
res.end();
});
app.listen(3000);
const stream = client.messages.stream({ /* ... */ });
let fullText = "";
stream.on("text", (text) => { fullText += text; });
const final = await stream.finalMessage();
console.log("Complete response:", fullText);
console.log("Total tokens:", final.usage.input_tokens + final.usage.output_tokens);
const controller = new AbortController();
const timeout = setTimeout(() => controller.abort(), 30_000);
try {
const stream = client.messages.stream(
{ model: "claude-sonnet-4-6", max_tokens: 1024, messages },
{ signal: controller.signal }
);
stream.on("text", (text) => process.stdout.write(text));
await stream.finalMessage();
} finally {
clearTimeout(timeout);
}
const history = [];
async function chat(userMessage) {
history.push({ role: "user", content: userMessage });
const stream = client.messages.stream({
model: "claude-sonnet-4-6",
max_tokens: 1024,
messages: history
});
let assistantText = "";
stream.on("text", (t) => { assistantText += t; process.stdout.write(t); });
await stream.finalMessage();
history.push({ role: "assistant", content: assistantText });
return assistantText;
}
await chat("Hello!");
await chat("Now write a haiku about Node.js.");
Streaming and non-streaming use identical token pricing. Use the Claude Cost Calculator to estimate Node.js app costs, and the Prompt-Pricing Recommender to choose the right model tier.