Skip to content

Realtime Voice

Connect to the Realtime Voice Agent WebSocket API for bidirectional text/audio streaming.

This example assumes using Xai; is in scope and apiKey contains your Xai API key.

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
var apiKey =
    Environment.GetEnvironmentVariable("XAI_API_KEY") is { Length: > 0 } apiKeyValue
        ? apiKeyValue
        : throw new AssertInconclusiveException("XAI_API_KEY environment variable is not found.");

// Create a WebSocket client and connect to the xAI Realtime API.
using var client = new XaiRealtimeClient(apiKey);
await client.ConnectAsync();

// Configure the session with voice, instructions, and turn detection.
await client.SendSessionUpdateAsync(new SessionUpdatePayload
{
    Session = new SessionConfig
    {
        Voice = SessionConfigVoice.Eve,
        Instructions = "You are a helpful assistant. Respond briefly.",
        Modalities = ["text", "audio"],
        TurnDetection = new TurnDetection
        {
            Type = "server_vad",
            Threshold = 0.85,
            SilenceDurationMs = 500,
        },
    },
});

// Send a text message and request a text response.
await client.SendConversationItemCreateAsync(new ConversationItemCreatePayload
{
    Item = new ConversationItem
    {
        Type = "message",
        Role = "user",
        Content = [new ContentPart { Type = "input_text", Text = "Say hello!" }],
    },
});
await client.SendResponseCreateAsync(new ResponseCreatePayload
{
    Response = new ResponseConfig
    {
        Modalities = ["text"],
    },
});

// Receive server events until the response is complete.
using var cts = new CancellationTokenSource(TimeSpan.FromSeconds(30));
var receivedSessionUpdated = false;
var receivedResponseDone = false;
string? transcriptText = null;

await foreach (var serverEvent in client.ReceiveUpdatesAsync(cts.Token))
{
    if (serverEvent.IsSessionUpdated)
    {
        receivedSessionUpdated = true;
    }
    else if (serverEvent.IsResponseOutputAudioTranscriptDelta)
    {
        transcriptText = (transcriptText ?? "") + serverEvent.ResponseOutputAudioTranscriptDelta?.Delta;
        Console.Write(serverEvent.ResponseOutputAudioTranscriptDelta?.Delta);
    }
    else if (serverEvent.IsResponseDone)
    {
        receivedResponseDone = true;
        break;
    }
    else if (serverEvent.IsError)
    {
    }
}