from typing import Any, Dict, Optional
from cjm_transcription_plugin_system.forced_alignment_core import ForcedAlignItem
class ExampleForcedAlignmentPlugin(ForcedAlignmentPlugin):
"""Example implementation showing how to create a forced alignment plugin."""
def __init__(self):
self._config: Dict[str, Any] = {}
@property
def name(self) -> str:
return "example-forced-alignment"
@property
def version(self) -> str:
return "1.0.0"
@property
def supported_formats(self) -> List[str]:
return ["wav", "mp3", "flac"]
def initialize(self, config: Optional[Dict[str, Any]] = None) -> None:
self._config = config or {"language": "English"}
def execute(
self,
audio: Union[AudioData, str, Path],
text: str,
**kwargs
) -> ForcedAlignResult:
"""Mock alignment that assigns equal time to each word."""
words = text.split()
duration = 10.0 # Assume 10 seconds of audio
time_per_word = duration / max(len(words), 1)
items = []
for i, word in enumerate(words):
items.append(ForcedAlignItem(
text=word.strip('.,!?;:'),
start_time=round(i * time_per_word, 2),
end_time=round((i + 1) * time_per_word, 2),
))
return ForcedAlignResult(
items=items,
metadata={"model": "mock", "language": self._config.get("language")}
)
def get_config_schema(self) -> Dict[str, Any]:
return {
"type": "object",
"properties": {
"language": {"type": "string", "default": "English"}
}
}
def get_current_config(self) -> Dict[str, Any]:
return self._config
def cleanup(self) -> None:
passForced Alignment Plugin Interface
Domain-specific plugin interface for word-level audio-text alignment
ForcedAlignmentPlugin
def ForcedAlignmentPlugin(
args:VAR_POSITIONAL, kwargs:VAR_KEYWORD
):
Abstract base class for all forced alignment plugins.
Extends PluginInterface with forced-alignment-specific requirements: - supported_formats: List of audio file extensions this plugin can handle - execute: Accepts audio path and transcript text, returns ForcedAlignResult
NOTE: When running via RemotePluginProxy, AudioData objects are automatically serialized to temp files via FileBackedDTO, so the Worker receives a file path.
How It Works
Host Process Worker Process (Isolated Env)
+---------------------+ +-----------------------------+
| audio = AudioData( | | |
| samples=np.array, | | ForcedAlignmentPlugin |
| sample_rate=16000 | | .execute( |
| ) | | audio="/tmp/xyz.wav", |
| | | text="Hello world" |
| plugin.execute( | HTTP/JSON | ) |
| audio=audio, | -----------------+ |
| text="Hello world"| (path string) | # audio is now a PATH |
| ) | | # text passed as-is |
| | | # Plugin aligns words |
| # Proxy detects | | # Returns ForcedAlignResult|
| # FileBackedDTO, | | |
| # calls to_temp_file| | |
+---------------------+ +-----------------------------+
The RemotePluginProxy automatically: 1. Detects AudioData implements FileBackedDTO 2. Calls audio.to_temp_file() to save to disk 3. Sends the file path string to the Worker 4. Worker’s execute() receives a path (not AudioData) plus the text string
Example Implementation
A minimal forced alignment plugin that demonstrates the interface:
# Test the example plugin
plugin = ExampleForcedAlignmentPlugin()
plugin.initialize({"language": "English"})
print(f"Plugin: {plugin.name} v{plugin.version}")
print(f"Supported formats: {plugin.supported_formats}")
print(f"Entry point group: {plugin.entry_point_group}")
# Test execution with text
result = plugin.execute("/tmp/audio.wav", text="Hello world how are you")
print(f"\nResult: {len(result.items)} items")
for item in result.items:
print(f" {item}")
print(f"Metadata: {result.metadata}")
assert len(result.items) == 5
assert result.items[0].text == "Hello"
assert result.metadata["language"] == "English"
plugin.cleanup()Plugin: example-forced-alignment v1.0.0
Supported formats: ['wav', 'mp3', 'flac']
Entry point group: transcription.forced_alignment_plugins
Result: 5 items
ForcedAlignItem(text='Hello', start_time=0.0, end_time=2.0)
ForcedAlignItem(text='world', start_time=2.0, end_time=4.0)
ForcedAlignItem(text='how', start_time=4.0, end_time=6.0)
ForcedAlignItem(text='are', start_time=6.0, end_time=8.0)
ForcedAlignItem(text='you', start_time=8.0, end_time=10.0)
Metadata: {'model': 'mock', 'language': 'English'}