Skip to content

Realtime Speech To Text

Real-time speech-to-text streaming using the typed ConnectAsync with query parameters.

This example assumes using Deepgram; is in scope and apiKey contains your Deepgram API key.

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
var apiKey =
    Environment.GetEnvironmentVariable("DEEPGRAM_API_KEY") is { Length: > 0 } apiKeyValue
        ? apiKeyValue
        : throw new AssertInconclusiveException("DEEPGRAM_API_KEY environment variable is not found.");

// Create a realtime ListenV1 client and authenticate.
await using var realtimeClient = new Realtime.DeepgramListenV1RealtimeClient();
realtimeClient.AuthorizeUsingToken(apiKey);

// Connect with typed query parameters — model, interim results, and language.
using var cts = new CancellationTokenSource(TimeSpan.FromSeconds(30));
await realtimeClient.ConnectAsync(
    model: Realtime.ListenV1Model.Nova3,
    interimResults: Realtime.ListenV1InterimResults.True,
    language: Realtime.ListenV1Language.FromString("en"),
    cancellationToken: cts.Token);

// Download a short audio sample and send it as binary frames.
using var httpClient = new HttpClient();
var audioBytes = await httpClient.GetByteArrayAsync(
    "https://dpgr.am/spacewalk.wav", cts.Token);

// Send audio in 8KB chunks.
const int chunkSize = 8192;
for (var offset = 0; offset < audioBytes.Length; offset += chunkSize)
{
    var length = Math.Min(chunkSize, audioBytes.Length - offset);
    await realtimeClient.SendAsync(
        new ArraySegment<byte>(audioBytes, offset, length),
        System.Net.WebSockets.WebSocketMessageType.Binary,
        endOfMessage: true,
        cts.Token);
}

// Signal end of audio and close the stream.
await realtimeClient.SendListenV1CloseStreamAsync(
    new Realtime.ListenV1ControlMessage
    {
        Type = Realtime.ListenV1ControlMessageType.CloseStream,
    },
    cts.Token);

// Receive transcription events until the connection closes.
var transcripts = new List<string>();
string? responseId = null;

await foreach (var serverEvent in realtimeClient
    .ReceiveUpdatesAsync(cts.Token))
{
    if (serverEvent.IsMetadata && serverEvent.Metadata is { } metadata)
    {
        responseId = metadata.RequestId.ToString();
        Console.WriteLine($"Session started: {responseId}");
    }
    else if (serverEvent.IsResults && serverEvent.Results is { } results)
    {
        if (results.IsFinal == true &&
            results.Channel?.Alternatives is { Count: > 0 } alts &&
            alts[0].Transcript is { Length: > 0 } transcript)
        {
            transcripts.Add(transcript);
            Console.WriteLine($"Final: {transcript}");
        }
    }
}

// Verify we received transcription results.
Console.WriteLine($"Total final transcripts: {transcripts.Count}");