Core Data Structures

DTOs for audio transcription with FileBackedDTO support for zero-copy transfer

source

AudioData


def AudioData(
    samples:ndarray, sample_rate:int
)->None:

Container for raw audio data. Implements FileBackedDTO for zero-copy transfer between Host and Worker processes.


source

TranscriptionResult


def TranscriptionResult(
    text:str, confidence:Optional=None, segments:Optional=None, metadata:Dict=<factory>
)->None:

Standardized output for all transcription plugins.

Testing AudioData

AudioData implements the FileBackedDTO protocol, which means the RemotePluginProxy will automatically serialize it to a temp file before sending to the Worker.

# Test AudioData creation
audio = AudioData(
    samples=np.sin(np.linspace(0, 2*np.pi*440, 16000)),  # 1 second of 440Hz tone
    sample_rate=16000
)

print(f"AudioData: {len(audio.samples)} samples at {audio.sample_rate}Hz")

# Test FileBackedDTO protocol
print(f"\nImplements FileBackedDTO: {isinstance(audio, FileBackedDTO)}")

# Test to_temp_file (this is what the Proxy calls)
temp_path = audio.to_temp_file()
print(f"Saved to temp file: {temp_path}")

# Verify the file exists and can be read back
import os
print(f"File exists: {os.path.exists(temp_path)}")
print(f"File size: {os.path.getsize(temp_path)} bytes")

# Clean up
os.unlink(temp_path)
AudioData: 16000 samples at 16000Hz

Implements FileBackedDTO: True
Saved to temp file: /tmp/tmpadmpzdp4.wav
File exists: True
File size: 32044 bytes
# Test TranscriptionResult
result = TranscriptionResult(
    text="Hello world",
    confidence=0.95,
    segments=[
        {"start": 0.0, "end": 0.5, "text": "Hello"},
        {"start": 0.5, "end": 1.0, "text": "world"}
    ],
    metadata={"model": "whisper-large-v3", "language": "en"}
)

print(f"Text: {result.text}")
print(f"Confidence: {result.confidence}")
print(f"Segments: {result.segments}")
print(f"Metadata: {result.metadata}")
Text: Hello world
Confidence: 0.95
Segments: [{'start': 0.0, 'end': 0.5, 'text': 'Hello'}, {'start': 0.5, 'end': 1.0, 'text': 'world'}]
Metadata: {'model': 'whisper-large-v3', 'language': 'en'}
# Test minimal result (only text required)
minimal = TranscriptionResult(text="Just the text")
print(f"Minimal result: {minimal}")

# Test from_file class method (if audio file available)
# audio_loaded = AudioData.from_file("path/to/audio.wav")
Minimal result: TranscriptionResult(text='Just the text', confidence=None, segments=None, metadata={})