Core Data Structures

DTOs for media analysis and processing with FileBackedDTO support for zero-copy transfer

TimeRange

Represents a specific temporal segment within a media file. Used by analysis plugins to mark regions of interest (VAD segments, scene boundaries, etc.).


TimeRange


def TimeRange(
    start:float, end:float, label:str='segment', confidence:Optional=None, payload:Dict=<factory>
)->None:

Represents a temporal segment within a media file.

# Test TimeRange creation
segment = TimeRange(
    start=1.5,
    end=3.2,
    label="speech",
    confidence=0.95,
    payload={"speaker_id": "speaker_01"}
)

print(f"TimeRange: {segment.start}s - {segment.end}s")
print(f"Label: {segment.label}")
print(f"Confidence: {segment.confidence}")
print(f"Payload: {segment.payload}")
print(f"\nAs dict: {segment.to_dict()}")
TimeRange: 1.5s - 3.2s
Label: speech
Confidence: 0.95
Payload: {'speaker_id': 'speaker_01'}

As dict: {'start': 1.5, 'end': 3.2, 'label': 'speech', 'confidence': 0.95, 'payload': {'speaker_id': 'speaker_01'}}

MediaMetadata

Standard container for basic media file information (duration, codec, streams, etc.).


MediaMetadata


def MediaMetadata(
    path:str, duration:float, format:str, size_bytes:int, video_streams:List=<factory>, audio_streams:List=<factory>
)->None:

Container for media file metadata.

# Test MediaMetadata creation
metadata = MediaMetadata(
    path="/path/to/video.mp4",
    duration=120.5,
    format="mp4",
    size_bytes=15_000_000,
    video_streams=[{"codec": "h264", "width": 1920, "height": 1080, "fps": 30}],
    audio_streams=[{"codec": "aac", "sample_rate": 48000, "channels": 2}]
)

print(f"File: {metadata.path}")
print(f"Duration: {metadata.duration}s")
print(f"Format: {metadata.format}")
print(f"Size: {metadata.size_bytes / 1_000_000:.2f} MB")
print(f"Video streams: {metadata.video_streams}")
print(f"Audio streams: {metadata.audio_streams}")
File: /path/to/video.mp4
Duration: 120.5s
Format: mp4
Size: 15.00 MB
Video streams: [{'codec': 'h264', 'width': 1920, 'height': 1080, 'fps': 30}]
Audio streams: [{'codec': 'aac', 'sample_rate': 48000, 'channels': 2}]

MediaAnalysisResult

Standard output for media analysis plugins. Implements FileBackedDTO for zero-copy transfer between Host and Worker processes.


MediaAnalysisResult


def MediaAnalysisResult(
    ranges:List, metadata:Dict=<factory>
)->None:

Standard output for media analysis plugins.

# Test MediaAnalysisResult creation
result = MediaAnalysisResult(
    ranges=[
        TimeRange(start=0.0, end=2.5, label="speech", confidence=0.98),
        TimeRange(start=2.5, end=4.0, label="silence", confidence=0.99),
        TimeRange(start=4.0, end=8.5, label="speech", confidence=0.95),
    ],
    metadata={"total_speech": 7.0, "total_silence": 1.5, "model": "silero-vad"}
)

print(f"Number of segments: {len(result.ranges)}")
for r in result.ranges:
    print(f"  {r.label}: {r.start}s - {r.end}s (conf: {r.confidence})")
print(f"Metadata: {result.metadata}")

# Test FileBackedDTO protocol
print(f"Implements FileBackedDTO: {isinstance(result, FileBackedDTO)}")

# Test to_temp_file (this is what the Proxy calls)
temp_path = result.to_temp_file()
print(f"Saved to temp file: {temp_path}")

# Verify the file exists
import os
print(f"File exists: {os.path.exists(temp_path)}")
print(f"File size: {os.path.getsize(temp_path)} bytes")

# Test from_file (round-trip)
loaded = MediaAnalysisResult.from_file(temp_path)
print(f"\nLoaded {len(loaded.ranges)} ranges from file")
print(f"Loaded metadata: {loaded.metadata}")

# Clean up
os.unlink(temp_path)
Number of segments: 3
  speech: 0.0s - 2.5s (conf: 0.98)
  silence: 2.5s - 4.0s (conf: 0.99)
  speech: 4.0s - 8.5s (conf: 0.95)
Metadata: {'total_speech': 7.0, 'total_silence': 1.5, 'model': 'silero-vad'}
Implements FileBackedDTO: True
Saved to temp file: /tmp/tmphqdcgxmu.json
File exists: True
File size: 339 bytes

Loaded 3 ranges from file
Loaded metadata: {'total_speech': 7.0, 'total_silence': 1.5, 'model': 'silero-vad'}