Skip to content

Cartesia

Nuget package dotnet License: MIT Discord

Features 🔥

  • Fully generated C# SDK based on official Cartesia OpenAPI specification using AutoSDK
  • Same day update to support new features
  • Updated and supported automatically if there are no breaking changes
  • All modern .NET features - nullability, trimming, NativeAOT, etc.
  • Support .Net Framework/.Net Standard 2.0

Usage

1
2
3
using Cartesia;

using var client = new CartesiaClient(apiKey);

Check API Status

Check the Cartesia API status.

1
2
3
4
5
using var client = new CartesiaClient(apiKey);

// Check the API status to verify connectivity.
var response = await client.ApiStatus.ApiStatusGetAsync(
    cartesiaVersion: ApiStatusGetCartesiaVersion.x20251104);

Speech To Text Client Get Text Async

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
using var api = new CartesiaClient(apiKey);
ISpeechToTextClient speechClient = api;

// Transcribe audio using the MEAI ISpeechToTextClient interface.
// Cartesia processes the audio synchronously — no polling required.
using var httpClient = new HttpClient();
await using var audioStream = await httpClient.GetStreamAsync(
    "https://cdn.openai.com/API/docs/audio/alloy.wav");

var ms = new MemoryStream();
await audioStream.CopyToAsync(ms);
ms.Position = 0;

var response = await speechClient.GetTextAsync(ms);

Console.WriteLine($"Text: {response.Text}");

Speech To Text Client Get Service Metadata

1
2
3
4
5
using var api = new CartesiaClient("dummy-key");
ISpeechToTextClient speechClient = api;

// Retrieve metadata about the speech-to-text provider.
var metadata = speechClient.GetService<SpeechToTextClientMetadata>();

Speech To Text Client Get Service Self

1
2
3
4
5
using var api = new CartesiaClient("dummy-key");
ISpeechToTextClient speechClient = api;

// Access the underlying CartesiaClient from the MEAI interface.
var self = speechClient.GetService<CartesiaClient>();

Text To Speech Client Get Service Metadata

1
2
3
4
5
using var api = new CartesiaClient("dummy-key");
ITextToSpeechClient speechClient = api;

// Retrieve metadata about the text-to-speech provider.
var metadata = speechClient.GetService<TextToSpeechClientMetadata>();

Text To Speech SSE Streaming

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
var handler = new TtsSseResponseHandler();
using var api = new CartesiaClient(
    "dummy-key",
    new HttpClient(handler)
    {
        BaseAddress = new Uri(CartesiaClient.DefaultBaseUrl),
    });

var request = new TTSSSERequest
{
    ModelId = TTSModel.Sonic35,
    Transcript = "Hello from Cartesia.",
    Voice = new TTSRequestVoiceSpecifier
    {
        Mode = TTSRequestVoiceSpecifierMode.Id,
        Id = "694f9389-aac1-45b6-b726-9d9369183238",
    },
    OutputFormat = new SSEOutputFormat
    {
        Container = SSEOutputFormatContainer.Raw,
        Encoding = RawEncoding.PcmS16le,
        SampleRate = 24000,
    },
    ContextId = "ctx-1",
};

var events = new List<CartesiaTtsSseEvent>();

// StreamTtsSseAsync yields events as the SSE response is read and decodes chunk audio bytes.
await foreach (var @event in api.StreamTtsSseAsync(request))
{
    events.Add(@event);
}

Support

Priority place for bugs: https://github.com/tryAGI/Cartesia/issues
Priority place for ideas and general questions: https://github.com/tryAGI/Cartesia/discussions
Discord: https://discord.gg/Ca2xhfBf3v

Acknowledgments

JetBrains logo

This project is supported by JetBrains through the Open Source Support Program.

CodeRabbit logo

This project is supported by CodeRabbit through the Open Source Support Program.