Examples¶

Enhance a WAV file¶

AIC_SDK_LICENSE=your_key_here python examples/enhance.py input.wav output.wav --strength 80

Streaming-like chunked processing¶

import numpy as np
from aic import Model, AICModelType

with Model(AICModelType.QUAIL_S, sample_rate=48000, channels=1, frames=480) as model:

    audio_stream = ...  # your audio input
    while audio_stream.has_data():
        chunk = audio_stream.get_chunk(480)
        enhanced = model.process(chunk)
        # play or store `enhanced`

Voice Activity Detection (VAD) during streaming¶

Attach a VAD to a model and query speech activity as you process audio.

import numpy as np
from aic import Model, AICModelType, AICVadParameter

with Model(AICModelType.QUAIL_L, sample_rate=48000, channels=1, frames=480) as model:
    with model.create_vad() as vad:
        # Optional: tune VAD behavior
        vad.set_parameter(AICVadParameter.SPEECH_HOLD_DURATION, 0.05)
        vad.set_parameter(AICVadParameter.SENSITIVITY, 6.0)

        for chunk in stream_chunks():  # yields (1, 480) float32 arrays
            model.process(chunk)
            is_speech = vad.is_speech_detected()
            if is_speech:
                handle_active_speech(chunk)

Sequential channel processing¶

Process audio where channels are stored sequentially (all samples for channel 0, then channel 1, etc.) rather than interleaved.

import numpy as np
from aic import Model, AICModelType

# Sequential layout: [ch0_samples..., ch1_samples..., ...]
ch0 = np.random.randn(480).astype(np.float32)
ch1 = np.random.randn(480).astype(np.float32)
audio_sequential = np.concatenate([ch0, ch1])  # All ch0, then all ch1

with Model(AICModelType.QUAIL_L, license_key=license_key, sample_rate=48000, channels=2, frames=480) as model:
    enhanced = model.process_sequential(audio_sequential, channels=2)
    # enhanced is modified in-place

STT-optimized models¶

Use STT-optimized models for speech-to-text applications. These models are designed to improve STT accuracy in challenging environments.

import numpy as np
from aic import Model, AICModelType

# For 16 kHz audio (recommended for most STT systems)
with Model(AICModelType.QUAIL_STT_L16, license_key=license_key, sample_rate=16000, channels=1, frames=160) as model:
    audio = np.random.randn(1, 160).astype(np.float32)
    enhanced = model.process(audio)

# For 8 kHz audio
with Model(AICModelType.QUAIL_STT_L8, license_key=license_key, sample_rate=8000, channels=1, frames=80) as model:
    audio = np.random.randn(1, 80).astype(np.float32)
    enhanced = model.process(audio)

# Voice Focus model - isolates foreground speaker while suppressing interfering speech
with Model(AICModelType.QUAIL_VF_STT_L16, license_key=license_key, sample_rate=16000, channels=1, frames=160) as model:
    audio = np.random.randn(1, 160).astype(np.float32)
    enhanced = model.process(audio)