Media Analysis Plugin Interface

Domain-specific plugin interface for media analysis (read-only / signal extraction)

MediaAnalysisPlugin


def MediaAnalysisPlugin(
    args:VAR_POSITIONAL, kwargs:VAR_KEYWORD
):

Abstract base class for plugins that analyze media files.

Analysis plugins perform read-only operations that extract temporal segments from media files (VAD, scene detection, beat detection, etc.).

How It Works

Host Process                              Worker Process (Isolated Env)
┌─────────────────────┐                  ┌─────────────────────────────┐
│                     │                  │                             │
│ plugin.execute(     │   HTTP/JSON      │  MediaAnalysisPlugin        │
│   media_path=       │ ─────────────────▶    .execute(                │
│   "/path/video.mp4" │                  │       media_path="..."      │
│ )                   │                  │    )                        │
│                     │                  │                             │
│                     │                  │  # Returns TimeRanges       │
│ MediaAnalysisResult │  ◀───────────────│  # e.g., speech segments    │
│   .ranges           │   (JSON or       │  #       scene boundaries   │
│   .metadata         │    FileBackedDTO)│  #       beat timestamps    │
│                     │                  │                             │
└─────────────────────┘                  └─────────────────────────────┘

Analysis plugins: - Receive a file path to the media to analyze - Return MediaAnalysisResult containing detected TimeRange segments - Are read-only (do not modify the input file)

Example Implementation

A minimal analysis plugin that demonstrates the interface:

from typing import Any, Dict, List, Optional
from cjm_media_plugin_system.core import TimeRange

class ExampleVADPlugin(MediaAnalysisPlugin):
    """Example VAD (Voice Activity Detection) plugin implementation."""
    
    def __init__(self):
        self._config: Dict[str, Any] = {}
        self._model = None

    @property
    def name(self) -> str:
        return "example-vad"
    
    @property
    def version(self) -> str:
        return "1.0.0"

    def initialize(self, config: Optional[Dict[str, Any]] = None) -> None:
        """Initialize with configuration."""
        self._config = config or {"threshold": 0.5}
        self._model = f"MockVAD-threshold-{self._config.get('threshold')}"

    def execute(
        self,
        media_path: Union[str, Path],
        **kwargs
    ) -> MediaAnalysisResult:
        """Detect voice activity segments in the audio."""
        # Mock VAD detection results
        ranges = [
            TimeRange(start=0.0, end=2.5, label="speech", confidence=0.95),
            TimeRange(start=2.5, end=3.0, label="silence", confidence=0.99),
            TimeRange(start=3.0, end=7.5, label="speech", confidence=0.92),
        ]
        
        return MediaAnalysisResult(
            ranges=ranges,
            metadata={
                "source": str(media_path),
                "model": self._config.get("threshold"),
                "total_speech": 7.0,
                "total_silence": 0.5
            }
        )

    def get_config_schema(self) -> Dict[str, Any]:
        """Return JSON Schema for configuration."""
        return {
            "type": "object",
            "properties": {
                "threshold": {
                    "type": "number",
                    "minimum": 0.0,
                    "maximum": 1.0,
                    "default": 0.5,
                    "description": "Voice activity detection threshold"
                }
            }
        }

    def get_current_config(self) -> Dict[str, Any]:
        """Return current configuration."""
        return self._config

    def cleanup(self) -> None:
        """Clean up resources."""
        self._model = None

# Test the example plugin
plugin = ExampleVADPlugin()
plugin.initialize({"threshold": 0.6})

print(f"Plugin: {plugin.name} v{plugin.version}")
print(f"Config schema: {plugin.get_config_schema()}")
print(f"Current config: {plugin.get_current_config()}")

# Test execution
result = plugin.execute("/path/to/audio.wav")
print(f"\nDetected {len(result.ranges)} segments:")
for r in result.ranges:
    print(f"  {r.label}: {r.start}s - {r.end}s (conf: {r.confidence})")
print(f"Metadata: {result.metadata}")

# Cleanup
plugin.cleanup()

Plugin: example-vad v1.0.0
Config schema: {'type': 'object', 'properties': {'threshold': {'type': 'number', 'minimum': 0.0, 'maximum': 1.0, 'default': 0.5, 'description': 'Voice activity detection threshold'}}}
Current config: {'threshold': 0.6}

Detected 3 segments:
  speech: 0.0s - 2.5s (conf: 0.95)
  silence: 2.5s - 3.0s (conf: 0.99)
  speech: 3.0s - 7.5s (conf: 0.92)
Metadata: {'source': '/path/to/audio.wav', 'model': 0.6, 'total_speech': 7.0, 'total_silence': 0.5}