Skip to content

JwtCache + Streaming STT

Wire an InworldJwtCache into InworldClient so the realtime (WebSocket) streaming path pulls a fresh JWT from the cache on every connect. This keeps long-lived backends correct across token rotations without reconstructing the client.

This example assumes using Inworld; is in scope and apiKey contains your Inworld API key.

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
var apiKey =
    Environment.GetEnvironmentVariable("INWORLD_JWT_KEY") is { Length: > 0 } k ? k :
    DecodeKeyPair(Environment.GetEnvironmentVariable("INWORLD_API_KEY")).key;
var apiSecret =
    Environment.GetEnvironmentVariable("INWORLD_JWT_SECRET") is { Length: > 0 } s ? s :
    DecodeKeyPair(Environment.GetEnvironmentVariable("INWORLD_API_KEY")).secret;

if (string.IsNullOrEmpty(apiKey) || string.IsNullOrEmpty(apiSecret))
{
    throw new AssertInconclusiveException("INWORLD_API_KEY (or INWORLD_JWT_KEY + INWORLD_JWT_SECRET) is required.");
}

// Build a JwtCache once — in production it would be a singleton.
using var jwtCache = new InworldJwtCache(apiKey, apiSecret);

// Construct InworldClient directly from the cache: REST uses the
// token active at construction, streaming fetches a fresh JWT per
// connect via RealtimeTokenProvider.
using var client = new InworldClient(jwtCache);

// Prove the provider hook is wired up.
var preview = await client.RealtimeTokenProvider!(CancellationToken.None);

// Synthesize a short phrase so we have something for STT to hear.
const string phrase = "JWT cache is wired into streaming.";
var tts = await client.TextToSpeech.SynthesizeSpeechAsync(
    text: phrase,
    voiceId: "Dennis",
    modelId: "inworld-tts-1.5-max",
    audioConfig: new AudioConfig
    {
        AudioEncoding = AudioEncoding.Linear16,
        SampleRateHertz = 16000,
    });

var audio = StripWavHeader(tts.AudioContent!);

// Stream it back through MEAI. The WebSocket connect pulls a fresh
// token from the cache under the hood.
Meai.ISpeechToTextClient speech = client;
using var audioStream = new MemoryStream(audio);

var transcriptBuilder = new System.Text.StringBuilder();
await foreach (var update in speech.GetStreamingTextAsync(
    audioStream,
    new Meai.SpeechToTextOptions
    {
        ModelId = "assemblyai/universal-streaming-multilingual",
        SpeechLanguage = "en-US",
    },
    CancellationToken.None))
{
    if (update.Kind == Meai.SpeechToTextResponseUpdateKind.TextUpdated)
    {
        transcriptBuilder.Append(update.Text);
    }

    if (update.Kind == Meai.SpeechToTextResponseUpdateKind.SessionClose)
    {
        break;
    }
}

    because: "streaming STT via JwtCache-backed auth should still return a transcript");