{
  "title": "Cowinx text-answer latency benchmark",
  "measured_at": "2026-04-24",
  "metric": "provider time to first token (TTFT)",
  "scope": "Text-only requests from Cowinx relay machines; excludes speech segmentation and transcription time.",
  "runs_per_region": 5,
  "headline_basis": {
    "model": "beijing/qwen3.5-flash",
    "successful_runs": 5,
    "ttft_p50_ms": 379,
    "ttft_p95_ms": 492,
    "source": "docs/testing/bench.md"
  },
  "additional_raw_samples": {
    "model": "openai/gpt-5.4-nano",
  "regions": [
    {
      "relay_region": "sjc",
      "ttft_ms": [314, 327, 317, 292, 393],
      "average_ms": 329
    },
    {
      "relay_region": "iad",
      "ttft_ms": [354, 409, 420, 377, 414],
      "average_ms": 395
    }
  ]
  },
  "reported_summary": "Approximately 380 ms TTFT, rounded from the 379 ms text-only p50 result.",
  "limitations": [
    "This is not end-to-end question-to-answer latency.",
    "Network conditions, model routing, prompt size, and provider load change results.",
    "Competitor latency figures on the comparison page are estimates unless an independent test is explicitly linked."
  ]
}
