Transcription Plugin Interface

Domain-specific plugin interface for audio transcription

source

TranscriptionPlugin


def TranscriptionPlugin(
    args:VAR_POSITIONAL, kwargs:VAR_KEYWORD
):

Abstract base class for all transcription plugins.

Extends PluginInterface with transcription-specific requirements: - supported_formats: List of audio file extensions this plugin can handle - execute: Accepts audio path (str) or AudioData, returns TranscriptionResult

NOTE: When running via RemotePluginProxy, AudioData objects are automatically serialized to temp files via FileBackedDTO, so the Worker receives a file path.

How It Works

Host Process                              Worker Process (Isolated Env)
┌─────────────────────┐                  ┌─────────────────────────────┐
│ audio = AudioData(  │                  │                             │
│   samples=np.array, │                  │  TranscriptionPlugin        │
│   sample_rate=16000 │                  │    .execute(                │
│ )                   │                  │       audio="/tmp/xyz.wav"  │
│                     │                  │    )                        │
│ plugin.execute(     │   HTTP/JSON      │                             │
│   audio=audio       │ ─────────────────▶  # audio is now a PATH      │
│ )                   │  (path string)   │  # Plugin reads from disk   │
│                     │                  │                             │
│ # Proxy detects     │                  │                             │
│ # FileBackedDTO,    │                  │                             │
│ # calls to_temp_file│                  │                             │
└─────────────────────┘                  └─────────────────────────────┘

The RemotePluginProxy automatically: 1. Detects AudioData implements FileBackedDTO 2. Calls audio.to_temp_file() to save to disk 3. Sends the file path string to the Worker 4. Worker’s execute() receives a path, not the AudioData object

Example Implementation

A minimal transcription plugin that demonstrates the interface:

from typing import Any, Dict, Optional

class ExampleTranscriptionPlugin(TranscriptionPlugin):
    """Example implementation showing how to create a transcription plugin."""
    
    def __init__(self):
        self._config: Dict[str, Any] = {}
        self._model = None

    @property
    def name(self) -> str:
        return "example-transcription"
    
    @property
    def version(self) -> str:
        return "1.0.0"
    
    @property
    def supported_formats(self) -> List[str]:
        return ["wav", "mp3", "flac"]

    def initialize(self, config: Optional[Dict[str, Any]] = None) -> None:
        """Initialize with configuration."""
        self._config = config or {"model": "base"}
        self._model = f"MockModel-{self._config.get('model', 'base')}"

    def execute(
        self,
        audio: Union[AudioData, str, Path],
        **kwargs
    ) -> TranscriptionResult:
        """Transcribe audio (receives file path when called via Proxy)."""
        # In Worker process, audio is typically a string path
        audio_path = str(audio) if not isinstance(audio, AudioData) else "in-memory"
        
        return TranscriptionResult(
            text=f"Transcribed from {audio_path}",
            confidence=0.95,
            segments=[{"start": 0.0, "end": 1.0, "text": "Mock transcription"}],
            metadata={"model": self._config.get("model")}
        )

    def get_config_schema(self) -> Dict[str, Any]:
        """Return JSON Schema for configuration."""
        return {
            "type": "object",
            "properties": {
                "model": {
                    "type": "string",
                    "enum": ["tiny", "base", "small", "medium", "large"],
                    "default": "base"
                },
                "language": {
                    "type": "string",
                    "default": "en"
                }
            }
        }

    def get_current_config(self) -> Dict[str, Any]:
        """Return current configuration."""
        return self._config

    def cleanup(self) -> None:
        """Clean up resources."""
        self._model = None
# Test the example plugin
plugin = ExampleTranscriptionPlugin()
plugin.initialize({"model": "large", "language": "en"})

print(f"Plugin: {plugin.name} v{plugin.version}")
print(f"Supported formats: {plugin.supported_formats}")
print(f"Config schema: {plugin.get_config_schema()}")
print(f"Current config: {plugin.get_current_config()}")

# Test execution with a file path (as Worker would receive)
result = plugin.execute("/tmp/audio.wav")
print(f"\nResult: {result}")

# Cleanup
plugin.cleanup()
Plugin: example-transcription v1.0.0
Supported formats: ['wav', 'mp3', 'flac']
Config schema: {'type': 'object', 'properties': {'model': {'type': 'string', 'enum': ['tiny', 'base', 'small', 'medium', 'large'], 'default': 'base'}, 'language': {'type': 'string', 'default': 'en'}}}
Current config: {'model': 'large', 'language': 'en'}

Result: TranscriptionResult(text='Transcribed from /tmp/audio.wav', confidence=0.95, segments=[{'start': 0.0, 'end': 1.0, 'text': 'Mock transcription'}], metadata={'model': 'large'})