Skip to content

Chat Completions (streaming)

This example shows how to consume SSE streaming from POST /v1/chat/completions with stream: true.

bash
curl https://api.fastapi.ai/v1/chat/completions \
  -H "Authorization: Bearer $FAST_API_KEY" \
  -H "Content-Type: application/json" \
  -d '{
    "model": "gpt-4o-mini",
    "stream": true,
    "messages": [
      {"role": "user", "content": "Stream a short greeting."}
    ]
  }'
javascript
const res = await fetch('https://api.fastapi.ai/v1/chat/completions', {
  method: 'POST',
  headers: {
    Authorization: `Bearer ${process.env.FAST_API_KEY}`,
    'Content-Type': 'application/json',
  },
  body: JSON.stringify({
    model: 'gpt-4o-mini',
    stream: true,
    messages: [{ role: 'user', content: 'Stream a short greeting.' }],
  }),
});

if (!res.ok) {
  console.error('HTTP', res.status, await res.text());
  process.exit(1);
}

const reader = res.body.getReader();
const decoder = new TextDecoder();
let buffer = '';

const write = (s) => process.stdout.write(s);

while (true) {
  const { value, done } = await reader.read();
  if (done) break;
  buffer += decoder.decode(value, { stream: true });

  const parts = buffer.split('\n\n');
  buffer = parts.pop() ?? '';

  for (const part of parts) {
    const dataLines = part
      .split('\n')
      .filter((l) => l.startsWith('data:'))
      .map((l) => l.slice(5).trim());

    for (const data of dataLines) {
      if (!data) continue;
      if (data === '[DONE]') return;
      try {
        const chunk = JSON.parse(data);
        const delta = chunk?.choices?.[0]?.delta?.content;
        if (typeof delta === 'string') {
          write(delta);
          continue;
        }
        console.log(chunk);
      } catch {
        console.log(data);
      }
    }
  }
}
python
import os
import json
import requests

resp = requests.post(
  "https://api.fastapi.ai/v1/chat/completions",
  headers={
    "Authorization": f"Bearer {os.environ['FAST_API_KEY']}",
    "Content-Type": "application/json",
  },
  json={
    "model": "gpt-4o-mini",
    "stream": True,
    "messages": [{"role": "user", "content": "Stream a short greeting."}],
  },
  stream=True,
)

for line in resp.iter_lines():
  if not line:
    continue
  text = line.decode("utf-8")
  if not text.startswith("data:"):
    continue
  data = text[5:].strip()
  if not data or data == "[DONE]":
    continue
  try:
    chunk = json.loads(data)
    delta = chunk.get("choices", [{}])[0].get("delta", {}).get("content")
    if isinstance(delta, str):
      print(delta, end="")
    else:
      print(chunk)
  except json.JSONDecodeError:
    print(data)
go
package main

import (
  "bufio"
  "bytes"
  "encoding/json"
  "fmt"
  "net/http"
  "os"
  "strings"
)

type ChatChunk struct {
  Choices []struct {
    Delta struct {
      Content string `json:"content"`
    } `json:"delta"`
  } `json:"choices"`
}

func main() {
  payload := map[string]any{
    "model":  "gpt-4o-mini",
    "stream": true,
    "messages": []map[string]any{{
      "role": "user",
      "content": "Stream a short greeting.",
    }},
  }
  b, _ := json.Marshal(payload)

  req, _ := http.NewRequest("POST", "https://api.fastapi.ai/v1/chat/completions", bytes.NewReader(b))
  req.Header.Set("Authorization", "Bearer "+os.Getenv("FAST_API_KEY"))
  req.Header.Set("Content-Type", "application/json")

  resp, err := http.DefaultClient.Do(req)
  if err != nil {
    panic(err)
  }
  defer resp.Body.Close()

  scanner := bufio.NewScanner(resp.Body)
  for scanner.Scan() {
    line := scanner.Text()
    if strings.HasPrefix(line, "data:") {
      data := strings.TrimSpace(strings.TrimPrefix(line, "data:"))
      if data == "" {
        continue
      }
      if data == "[DONE]" {
        return
      }
      var chunk ChatChunk
      if err := json.Unmarshal([]byte(data), &chunk); err == nil {
        if len(chunk.Choices) > 0 && chunk.Choices[0].Delta.Content != "" {
          fmt.Print(chunk.Choices[0].Delta.Content)
          continue
        }
      }
      fmt.Println(data)
    }
  }
}
java
import java.io.BufferedReader;
import java.io.InputStreamReader;
import java.net.URI;
import java.net.http.HttpClient;
import java.net.http.HttpRequest;
import java.net.http.HttpResponse;

public class Main {
  public static void main(String[] args) throws Exception {
    String body = "{\"model\":\"gpt-4o-mini\",\"stream\":true,\"messages\":[{\"role\":\"user\",\"content\":\"Stream a short greeting.\"}]}";

    HttpRequest req = HttpRequest.newBuilder()
      .uri(URI.create("https://api.fastapi.ai/v1/chat/completions"))
      .header("Authorization", "Bearer " + System.getenv("FAST_API_KEY"))
      .header("Content-Type", "application/json")
      .POST(HttpRequest.BodyPublishers.ofString(body))
      .build();

    HttpResponse<java.io.InputStream> resp =
      HttpClient.newHttpClient().send(req, HttpResponse.BodyHandlers.ofInputStream());

    try (BufferedReader reader = new BufferedReader(new InputStreamReader(resp.body()))) {
      String line;
      while ((line = reader.readLine()) != null) {
        if (!line.startsWith("data:")) continue;
        String data = line.substring(5).trim();
        if (data.isEmpty() || "[DONE]".equals(data)) continue;
        if (data.contains("\"delta\"") && data.contains("\"content\"")) {
          int start = data.indexOf("\"content\":\"");
          if (start >= 0) {
            start += 11;
            int end = data.indexOf("\"", start);
            if (end > start) {
              System.out.print(data.substring(start, end));
              continue;
            }
          }
        }
        System.out.println(data);
      }
    }
  }
}

那年我双手插兜, 让bug稳如老狗