core

Core data structures for audio transcription

source

AudioData

 AudioData (samples:numpy.ndarray, sample_rate:int, duration:float,
            filepath:Optional[pathlib.Path]=None,
            metadata:Dict[str,Any]=<factory>)

Container for audio data and metadata.


source

TranscriptionResult

 TranscriptionResult (text:str, confidence:Optional[float]=None,
                      segments:Optional[List[Dict]]=<factory>,
                      metadata:Optional[Dict]=<factory>)

Standardized transcription output.

Testing the dataclasses

# Test AudioData
import numpy as np

audio = AudioData(
    samples=np.array([0.1, 0.2, 0.3]),
    sample_rate=16000,
    duration=1.5,
    filepath=Path("/tmp/test.wav")
)

print("AudioData instance:")
print(audio)
print(f"\nMetadata: {audio.metadata}")
audio.metadata['format'] = 'wav'
print(f"Updated metadata: {audio.metadata}")
AudioData instance:
AudioData(samples=array([0.1, 0.2, 0.3]), sample_rate=16000, duration=1.5, filepath=Path('/tmp/test.wav'), metadata={})

Metadata: {}
Updated metadata: {'format': 'wav'}
# Test TranscriptionResult
result = TranscriptionResult(
    text="Hello world",
    confidence=0.95,
    segments=[
        {"start": 0.0, "end": 0.5, "text": "Hello"},
        {"start": 0.5, "end": 1.0, "text": "world"}
    ]
)

print("TranscriptionResult instance:")
print(result)
print(f"\nText: {result.text}")
print(f"Confidence: {result.confidence}")
print(f"Segments: {result.segments}")
print(f"Metadata: {result.metadata}")
TranscriptionResult instance:
TranscriptionResult(text='Hello world', confidence=0.95, segments=[{'start': 0.0, 'end': 0.5, 'text': 'Hello'}, {'start': 0.5, 'end': 1.0, 'text': 'world'}], metadata={})

Text: Hello world
Confidence: 0.95
Segments: [{'start': 0.0, 'end': 0.5, 'text': 'Hello'}, {'start': 0.5, 'end': 1.0, 'text': 'world'}]
Metadata: {}
# Test default values
result_minimal = TranscriptionResult(text="Just text")
print("\nMinimal TranscriptionResult:")
print(f"Text: {result_minimal.text}")
print(f"Confidence: {result_minimal.confidence}")
print(f"Segments: {result_minimal.segments}")
print(f"Metadata: {result_minimal.metadata}")

# Test equality (automatic with dataclass)
result_copy = TranscriptionResult(text="Just text")
print(f"\nEquality test: {result_minimal == result_copy}")

Minimal TranscriptionResult:
Text: Just text
Confidence: None
Segments: []
Metadata: {}

Equality test: True