> ## Documentation Index
> Fetch the complete documentation index at: https://docs.camb.ai/llms.txt
> Use this file to discover all available pages before exploring further.

# TTS with API

> Use the Camb.ai TTS API directly for maximum control and flexibility

export const AudioPlayer = ({src, title, prompt, badge, resetOnPause = true, exclusive = true}) => {
  const audioRef = React.useRef(null);
  const [playing, setPlaying] = React.useState(false);
  const [progress, setProgress] = React.useState(0);
  const [hovering, setHovering] = React.useState(false);
  const uid = React.useMemo(() => 'p' + Math.random().toString(36).slice(2, 8), []);
  React.useEffect(() => {
    const audio = audioRef.current;
    if (!audio) return;
    const onTime = () => setProgress(audio.duration ? audio.currentTime / audio.duration : 0);
    const onEnd = () => {
      setPlaying(false);
      setProgress(0);
    };
    audio.addEventListener('timeupdate', onTime);
    audio.addEventListener('ended', onEnd);
    const onGlobalStop = e => {
      if (e.detail !== uid && !audio.paused) {
        audio.pause();
        audio.currentTime = 0;
        setPlaying(false);
        setProgress(0);
      }
    };
    if (exclusive) window.addEventListener('audio-player-stop', onGlobalStop);
    return () => {
      audio.removeEventListener('timeupdate', onTime);
      audio.removeEventListener('ended', onEnd);
      if (exclusive) window.removeEventListener('audio-player-stop', onGlobalStop);
    };
  }, []);
  const toggle = e => {
    if (e) e.stopPropagation();
    const audio = audioRef.current;
    if (!audio) return;
    if (audio.paused) {
      if (exclusive) window.dispatchEvent(new CustomEvent('audio-player-stop', {
        detail: uid
      }));
      audio.currentTime = 0;
      setProgress(0);
      audio.play().catch(() => setPlaying(false));
      setPlaying(true);
    } else {
      audio.pause();
      if (resetOnPause) {
        audio.currentTime = 0;
        setProgress(0);
      }
      setPlaying(false);
    }
  };
  const circleSize = 52;
  const strokeWidth = 3;
  const radius = (circleSize - strokeWidth) / 2;
  const circumference = 2 * Math.PI * radius;
  const strokeDashoffset = circumference * (1 - progress);
  return <div style={{
    borderRadius: '14px',
    border: '1px solid rgba(236,85,18,0.12)',
    background: 'linear-gradient(160deg, rgba(236,85,18,0.04) 0%, transparent 40%, rgba(236,85,18,0.02) 100%)',
    overflow: 'hidden',
    marginBottom: '16px'
  }}>
      <div style={{
    height: '2px',
    background: 'linear-gradient(90deg, transparent, #EC5512 30%, #FF8A5C 50%, #EC5512 70%, transparent)'
  }} />
      <div style={{
    padding: '18px 22px 16px',
    display: 'flex',
    alignItems: 'center',
    gap: '16px'
  }}>
        <div style={{
    flexShrink: 0,
    position: 'relative',
    width: '52px',
    height: '52px',
    cursor: 'pointer'
  }} onMouseEnter={() => setHovering(true)} onMouseLeave={() => setHovering(false)}>
          <svg width={circleSize} height={circleSize} style={{
    position: 'absolute',
    top: 0,
    left: 0,
    transform: 'rotate(-90deg)'
  }}>
            <circle cx={26} cy={26} r={radius} fill="none" stroke="rgba(255,255,255,0.08)" strokeWidth={strokeWidth} />
            <circle cx={26} cy={26} r={radius} fill="none" stroke={`url(#${uid})`} strokeWidth={strokeWidth} strokeDasharray={circumference} strokeDashoffset={strokeDashoffset} strokeLinecap="round" style={{
    transition: 'stroke-dashoffset 0.15s ease'
  }} />
            <defs><linearGradient id={uid} x1="0%" y1="0%" x2="100%" y2="0%"><stop offset="0%" stopColor="#EC5512" /><stop offset="100%" stopColor="#FF8A5C" /></linearGradient></defs>
          </svg>
          <button onClick={toggle} style={{
    position: 'absolute',
    top: '50%',
    left: '50%',
    transform: 'translate(-50%, -50%)',
    width: '40px',
    height: '40px',
    borderRadius: '50%',
    background: hovering ? 'linear-gradient(145deg, #F06020, #EC5512)' : 'linear-gradient(145deg, #EC5512, #D44A0F)',
    border: 'none',
    cursor: 'pointer',
    display: 'flex',
    alignItems: 'center',
    justifyContent: 'center',
    boxShadow: hovering ? '0 2px 12px rgba(236,85,18,0.45)' : '0 1px 4px rgba(236,85,18,0.25)',
    transition: 'all 0.2s ease',
    padding: playing ? '0' : '0 0 0 1.5px'
  }}>
            {playing ? <svg width="14" height="14" viewBox="0 0 24 24" fill="white" stroke="none"><rect x="6" y="4" width="4" height="16" rx="1" /><rect x="14" y="4" width="4" height="16" rx="1" /></svg> : <svg width="14" height="14" viewBox="0 0 24 24" fill="white" stroke="none"><polygon points="7 3 21 12 7 21" /></svg>}
          </button>
        </div>
        <div style={{
    flex: 1,
    minWidth: 0
  }}>
          <div style={{
    display: 'flex',
    alignItems: 'center',
    gap: '8px',
    flexWrap: 'wrap',
    marginBottom: prompt ? '2px' : '4px'
  }}>
            <span style={{
    fontWeight: 600,
    fontSize: '14px',
    letterSpacing: '-0.01em'
  }}>{title}</span>
            {badge && <span style={{
    fontSize: '10px',
    fontWeight: 600,
    padding: '2px 7px',
    borderRadius: '4px',
    background: 'rgba(236,85,18,0.08)',
    color: '#EC5512',
    fontFamily: 'ui-monospace, SFMono-Regular, Menlo, monospace',
    letterSpacing: '0.02em',
    border: '1px solid rgba(236,85,18,0.12)',
    whiteSpace: 'nowrap'
  }}>{badge}</span>}
          </div>
          {prompt && <div style={{
    fontSize: '12.5px',
    opacity: 0.5,
    lineHeight: '1.4',
    fontStyle: 'italic'
  }}>"{prompt}"</div>}
        </div>
      </div>
      <audio ref={audioRef} preload="metadata" src={src} />
    </div>;
};

## Overview

While SDKs and frameworks provide convenience, sometimes you need direct control over API calls. This tutorial shows how to call the Camb.ai TTS API directly using HTTP requests.

### When to Use Direct API

* Building integrations in languages without an SDK
* Need fine-grained control over request/response handling
* Debugging or testing API behavior
* Building custom streaming implementations

### Listen to an Example

<AudioPlayer src="/audio/demo-accent-en-us.wav" title="Direct API Output" prompt="Welcome to our service. We're glad to have you here." badge="en-us" />

### Prerequisites

<Steps>
  <Step title="Create an account">
    Sign up at [CAMB.AI Studio](https://studio.camb.ai) if you haven't already.
  </Step>

  <Step title="Get your API key">
    Go to **Settings → API Keys** in Studio and copy your key. See [Authentication](/getting-started/authentication) for details.
  </Step>
</Steps>

***

## Basic TTS Request

`POST /tts-stream` returns a **binary audio byte stream** (for example `audio/wav` or `audio/mpeg`), not Server-Sent Events or JSON chunks. The server sends the `Content-Type` that matches your `output_configuration.format`. You can buffer the full body for short clips, or read in chunks for lower latency—see [Stream Text-to-Speech Audio](/api-reference/endpoint/create-tts-stream).

<CodeGroup>
  ```bash cURL theme={null}
  curl -X POST "https://client.camb.ai/apis/tts-stream" \
    -H "x-api-key: YOUR_API_KEY" \
    -H "Content-Type: application/json" \
    -d '{
      "text": "Hello from the command line!",
      "voice_id": 147320,
      "language": "en-us",
      "speech_model": "mars-8.1-flash-beta",
      "output_configuration": {
        "format": "wav"
      }
    }' \
    --output output.wav
  ```

  ```python Python (requests) theme={null}
  import os
  import requests

  def text_to_speech(text: str, voice_id: int = 147320) -> bytes:
      """Synchronous TTS call: stream the response and concatenate chunks (matches the streaming endpoint)."""
      api_key = os.getenv("CAMB_API_KEY")
      url = "https://client.camb.ai/apis/tts-stream"

      headers = {
          "x-api-key": api_key,
          "Content-Type": "application/json",
      }

      payload = {
          "text": text,
          "voice_id": voice_id,
          "language": "en-us",
          "speech_model": "mars-8.1-flash-beta",
          "output_configuration": {"format": "wav"},
      }

      with requests.post(url, headers=headers, json=payload, stream=True) as response:
          response.raise_for_status()
          return b"".join(
              chunk for chunk in response.iter_content(chunk_size=1024) if chunk
          )


  if __name__ == "__main__":
      audio = text_to_speech("Hello world!")
      with open("output.wav", "wb") as f:
          f.write(audio)
  ```

  ```python Python (aiohttp) theme={null}
  import asyncio
  import os
  import aiohttp

  async def text_to_speech(text: str, voice_id: int = 147320) -> bytes:
      """Convert text to speech using direct API call; buffers full body after a successful response."""
      api_key = os.getenv("CAMB_API_KEY")
      url = "https://client.camb.ai/apis/tts-stream"

      headers = {
          "x-api-key": api_key,
          "Content-Type": "application/json",
      }

      payload = {
          "text": text,
          "voice_id": voice_id,
          "language": "en-us",
          "speech_model": "mars-8.1-flash-beta",
          "output_configuration": {
              "format": "wav"
          },
      }

      timeout = aiohttp.ClientTimeout(total=120)

      async with aiohttp.ClientSession(timeout=timeout) as session:
          async with session.post(url, headers=headers, json=payload) as resp:
              resp.raise_for_status()
              return await resp.read()


  async def main():
      audio_data = await text_to_speech("Hello from the direct API!")

      with open("output.wav", "wb") as f:
          f.write(audio_data)

      print(f"Saved {len(audio_data)} bytes to output.wav")


  if __name__ == "__main__":
      asyncio.run(main())
  ```
</CodeGroup>

***

## Streaming Response

For real-time playback, iterate over the **raw response body** as chunks arrive. Always validate the status **before** reading the stream—non-success responses may return JSON (for example validation errors), not audio.

Responses can include the `X-Credits-Required` header for usage tracking (see the [API reference](/api-reference/endpoint/create-tts-stream)).

```python theme={null}
import asyncio
import os
import aiohttp


async def stream_tts(text: str, voice_id: int = 147320):
    """Yield audio chunks as they arrive; raises on non-success HTTP status."""
    api_key = os.getenv("CAMB_API_KEY")
    url = "https://client.camb.ai/apis/tts-stream"

    headers = {
        "x-api-key": api_key,
        "Content-Type": "application/json",
    }

    payload = {
        "text": text,
        "voice_id": voice_id,
        "language": "en-us",
        "speech_model": "mars-8.1-flash-beta",
        "output_configuration": {"format": "wav"},
    }

    timeout = aiohttp.ClientTimeout(total=120)

    async with aiohttp.ClientSession(timeout=timeout) as session:
        async with session.post(url, headers=headers, json=payload) as resp:
            print(f"Status: {resp.status}")
            print(f"Content-Type: {resp.headers.get('Content-Type')}")
            print(f"X-Credits-Required: {resp.headers.get('X-Credits-Required')}")
            resp.raise_for_status()

            async for chunk in resp.content.iter_chunked(4096):
                yield chunk


async def main():
    with open("streamed_output.wav", "wb") as f:
        async for chunk in stream_tts("This is streamed audio generation."):
            f.write(chunk)


if __name__ == "__main__":
    asyncio.run(main())
```

***

## Request Parameters

These fields match [Stream Text-to-Speech Audio](/api-reference/endpoint/create-tts-stream) and the OpenAPI schema for `POST /tts-stream`.

### Required Parameters

| Parameter  | Type    | Description                                                                                                               |
| ---------- | ------- | ------------------------------------------------------------------------------------------------------------------------- |
| `text`     | string  | Text to synthesize (**3–3000** characters)                                                                                |
| `language` | string  | BCP-47 locale (e.g. `en-us`). Case-sensitive lowercase. Unsupported locales for the chosen `speech_model` return **422**. |
| `voice_id` | integer | Voice profile ID from [`/list-voices`](/api-reference/endpoint/list-voices)                                               |

### Optional Parameters

| Parameter                              | Type    | Default                  | Description                                                                                                                                                                                                                                                                               |
| -------------------------------------- | ------- | ------------------------ | ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
| `speech_model`                         | string  | `mars-8.1-flash-beta`    | `mars-8.1-flash-beta`, `mars-8.1-pro-beta`, `mars-flash`, `mars-pro`, `mars-instruct`. MARS 8.1 beta models support inline pronunciation and non-verbal tags in `text`; `mars-instruct` uses a different expressive tag set ([API reference](/api-reference/endpoint/create-tts-stream)). |
| `output_configuration`                 | object  | format defaults to `wav` | `format`, optional `sample_rate`. Supported formats depend on `speech_model` (see the **Output format support by model** table in the API reference).                                                                                                                                     |
| `voice_settings`                       | object  | —                        | `speaking_rate`, reference quality, accent controls ([API reference](/api-reference/endpoint/create-tts-stream)).                                                                                                                                                                         |
| `inference_options`                    | object  | —                        | e.g. `inference_steps` where applicable ([API reference](/api-reference/endpoint/create-tts-stream)).                                                                                                                                                                                     |
| `enhance_named_entities_pronunciation` | boolean | `false`                  | Improves named-entity pronunciation when supported. **Not supported** for `mars-8.1-flash-beta` or `mars-8.1-pro-beta` (same as the API reference note).                                                                                                                                  |

More details available in the [API Reference](/api-reference/endpoint/create-tts-stream).

`mars-instruct` does not support `mp3` or `pcm_s16be` (see **Output format support by model** in the API reference).

***

## Expressive and pronunciation controls

* **`mars-8.1-flash-beta` / `mars-8.1-pro-beta`**: English CMU phoneme overrides (e.g. `[B EY1 S]`) and non-verbal tags such as `[laughter]`—see **MARS 8.1 Beta Text Controls** in the [API reference](/api-reference/endpoint/create-tts-stream).
* **`mars-instruct`**: Emotion and pacing tags and SSML-style pauses in `text`—examples below.

When you use `speech_model: "mars-instruct"`, you can encode expression directly in the `text` field.

English examples:

* `[speaking slowly] This is very important. Please pay close attention.`
* `[excited] We shipped the feature, and the response has been fantastic!`
* `Let's pause for a moment <break time="400ms"/> and continue clearly.`

<Note>
  For a comprehensive guide on emotional expression, pauses, and prosody control, see the [Emotional Voice Control tutorial](/tutorials/emotional-voice-control).
</Note>

***

## Listing Voices

Get available voices:

<CodeGroup>
  ```python Python theme={null}
  import asyncio
  import os
  import aiohttp


  async def list_voices():
      """List all available voices."""
      api_key = os.getenv("CAMB_API_KEY")
      url = "https://client.camb.ai/apis/list-voices"

      headers = {"x-api-key": api_key}

      async with aiohttp.ClientSession() as session:
          async with session.get(url, headers=headers) as resp:
              if resp.status == 200:
                  voices = await resp.json()
                  return voices
              else:
                  raise Exception(f"Error: {resp.status}")


  async def main():
      voices = await list_voices()

      print(f"Found {len(voices)} voices:\n")
      for voice in voices[:10]:  # Print first 10
          print(f"ID: {voice['id']}, Name: {voice['voice_name']}, Gender: {voice['gender']}")


  if __name__ == "__main__":
      asyncio.run(main())
  ```

  ```bash cURL theme={null}
  curl -X GET "https://client.camb.ai/apis/list-voices" \
    -H "x-api-key: YOUR_API_KEY"
  ```
</CodeGroup>

***

## Playing Audio

<CodeGroup>
  ```python Using sounddevice theme={null}
  import asyncio
  import io
  import os
  import wave
  import aiohttp
  import numpy as np
  import sounddevice as sd


  async def play_tts(text: str):
      """Generate and play TTS audio."""
      api_key = os.getenv("CAMB_API_KEY")
      url = "https://client.camb.ai/apis/tts-stream"

      headers = {
          "x-api-key": api_key,
          "Content-Type": "application/json",
      }

      payload = {
          "text": text,
          "voice_id": 147320,
          "language": "en-us",
          "speech_model": "mars-8.1-flash-beta",
          "output_configuration": {"format": "wav"},
      }

      timeout = aiohttp.ClientTimeout(total=120)

      async with aiohttp.ClientSession(timeout=timeout) as session:
          async with session.post(url, headers=headers, json=payload) as resp:
              resp.raise_for_status()
              audio_bytes = await resp.read()

      # Parse WAV and extract PCM data
      with wave.open(io.BytesIO(audio_bytes), 'rb') as wav_file:
          sample_rate = wav_file.getframerate()
          audio_data = np.frombuffer(wav_file.readframes(-1), dtype=np.int16)

      sd.play(audio_data, samplerate=sample_rate)
      sd.wait()


  if __name__ == "__main__":
      asyncio.run(play_tts("Hello! This audio is playing directly."))
  ```

  ```python Converting raw PCM to WAV theme={null}
  import wave

  def save_as_wav(pcm_data: bytes, filename: str, sample_rate: int = 22050):
      """Save raw PCM16 mono bytes as a WAV file (use only when `output_configuration.format` is a raw `pcm_*` format)."""
      with wave.open(filename, "wb") as wav_file:
          wav_file.setnchannels(1)  # Mono
          wav_file.setsampwidth(2)  # 16-bit
          wav_file.setframerate(sample_rate)
          wav_file.writeframes(pcm_data)

  # Example: wrap PCM from a `pcm_*` stream after you have the full byte buffer
  # (If you use format "wav", the API already returns a WAV container—open it with wave.open(io.BytesIO(...)) instead.)
  if __name__ == "__main__":
      pcm_bytes = b"..."  # your raw PCM payload
      save_as_wav(pcm_bytes, "output.wav", sample_rate=22050)
  ```
</CodeGroup>

***

## Next Steps

<CardGroup cols={2}>
  <Card title="Python SDK" icon="python" href="/sdk-guides/python-sdk">
    Use the SDK for simpler integration
  </Card>

  <Card title="API Reference" icon="code" href="/api-reference/endpoint/create-tts-stream">
    Complete API documentation
  </Card>

  <Card title="Voice Agents" icon="https://mintcdn.com/cambai/2LvnefIkletroPxv/images/pipecat-orange.svg?fit=max&auto=format&n=2LvnefIkletroPxv&q=85&s=40cf8e001b8cadc8a4c3c557dea603d5" href="/integrations/pipecat" width="24" height="24" data-path="images/pipecat-orange.svg">
    Build real-time voice applications
  </Card>

  <Card title="Voice Library" icon="speech" href="/api-reference/endpoint/list-voices">
    Browse available voices
  </Card>
</CardGroup>
