from typing import Any, Dict, List, Optional
from cjm_media_plugin_system.core import TimeRange
class ExampleVADPlugin(MediaAnalysisPlugin):
"""Example VAD (Voice Activity Detection) plugin implementation."""
def __init__(self):
self._config: Dict[str, Any] = {}
self._model = None
@property
def name(self) -> str:
return "example-vad"
@property
def version(self) -> str:
return "1.0.0"
def initialize(self, config: Optional[Dict[str, Any]] = None) -> None:
"""Initialize with configuration."""
self._config = config or {"threshold": 0.5}
self._model = f"MockVAD-threshold-{self._config.get('threshold')}"
def execute(
self,
media_path: Union[str, Path],
**kwargs
) -> MediaAnalysisResult:
"""Detect voice activity segments in the audio."""
# Mock VAD detection results
ranges = [
TimeRange(start=0.0, end=2.5, label="speech", confidence=0.95),
TimeRange(start=2.5, end=3.0, label="silence", confidence=0.99),
TimeRange(start=3.0, end=7.5, label="speech", confidence=0.92),
]
return MediaAnalysisResult(
ranges=ranges,
metadata={
"source": str(media_path),
"model": self._config.get("threshold"),
"total_speech": 7.0,
"total_silence": 0.5
}
)
def get_config_schema(self) -> Dict[str, Any]:
"""Return JSON Schema for configuration."""
return {
"type": "object",
"properties": {
"threshold": {
"type": "number",
"minimum": 0.0,
"maximum": 1.0,
"default": 0.5,
"description": "Voice activity detection threshold"
}
}
}
def get_current_config(self) -> Dict[str, Any]:
"""Return current configuration."""
return self._config
def cleanup(self) -> None:
"""Clean up resources."""
self._model = NoneMedia Analysis Plugin Interface
Domain-specific plugin interface for media analysis (read-only / signal extraction)
MediaAnalysisPlugin
def MediaAnalysisPlugin(
args:VAR_POSITIONAL, kwargs:VAR_KEYWORD
):
Abstract base class for plugins that analyze media files.
Analysis plugins perform read-only operations that extract temporal segments from media files (VAD, scene detection, beat detection, etc.).
How It Works
Host Process Worker Process (Isolated Env)
┌─────────────────────┐ ┌─────────────────────────────┐
│ │ │ │
│ plugin.execute( │ HTTP/JSON │ MediaAnalysisPlugin │
│ media_path= │ ─────────────────▶ .execute( │
│ "/path/video.mp4" │ │ media_path="..." │
│ ) │ │ ) │
│ │ │ │
│ │ │ # Returns TimeRanges │
│ MediaAnalysisResult │ ◀───────────────│ # e.g., speech segments │
│ .ranges │ (JSON or │ # scene boundaries │
│ .metadata │ FileBackedDTO)│ # beat timestamps │
│ │ │ │
└─────────────────────┘ └─────────────────────────────┘
Analysis plugins: - Receive a file path to the media to analyze - Return MediaAnalysisResult containing detected TimeRange segments - Are read-only (do not modify the input file)
Example Implementation
A minimal analysis plugin that demonstrates the interface:
# Test the example plugin
plugin = ExampleVADPlugin()
plugin.initialize({"threshold": 0.6})
print(f"Plugin: {plugin.name} v{plugin.version}")
print(f"Config schema: {plugin.get_config_schema()}")
print(f"Current config: {plugin.get_current_config()}")
# Test execution
result = plugin.execute("/path/to/audio.wav")
print(f"\nDetected {len(result.ranges)} segments:")
for r in result.ranges:
print(f" {r.label}: {r.start}s - {r.end}s (conf: {r.confidence})")
print(f"Metadata: {result.metadata}")
# Cleanup
plugin.cleanup()Plugin: example-vad v1.0.0
Config schema: {'type': 'object', 'properties': {'threshold': {'type': 'number', 'minimum': 0.0, 'maximum': 1.0, 'default': 0.5, 'description': 'Voice activity detection threshold'}}}
Current config: {'threshold': 0.6}
Detected 3 segments:
speech: 0.0s - 2.5s (conf: 0.95)
silence: 2.5s - 3.0s (conf: 0.99)
speech: 3.0s - 7.5s (conf: 0.92)
Metadata: {'source': '/path/to/audio.wav', 'model': 0.6, 'total_speech': 7.0, 'total_silence': 0.5}