cjm-transcription-plugin-whisper
OpenAI Whisper plugin for the cjm-transcription-plugin-system library - provides local speech-to-text transcription with configurable model selection and parameter control.
Install
pip install cjm_transcription_plugin_whisperProject Structure
nbs/
├── meta.ipynb # Metadata introspection for the Whisper plugin used by cjm-ctl to generate the registration manifest.
└── plugin.ipynb # Plugin implementation for OpenAI Whisper transcription
Total: 2 notebooks across 1 directory
Module Dependencies
graph LR
meta[meta<br/>Metadata]
plugin[plugin<br/>Whisper Plugin]
plugin --> meta
1 cross-module dependencies detected
CLI Reference
No CLI commands found in this project.
Module Overview
Detailed documentation for each module in the project:
Metadata (meta.ipynb)
Metadata introspection for the Whisper plugin used by cjm-ctl to generate the registration manifest.
Import
from cjm_transcription_plugin_whisper.meta import (
get_plugin_metadata
)Functions
def get_plugin_metadata() -> Dict[str, Any]: # Plugin metadata for manifest generation
"""Return metadata required to register this plugin with the PluginManager."""
# Fallback base path (current behavior for backward compatibility)
base_path = os.path.dirname(os.path.dirname(sys.executable))
# Use CJM config if available, else fallback to env-relative paths
cjm_data_dir = os.environ.get("CJM_DATA_DIR")
cjm_models_dir = os.environ.get("CJM_MODELS_DIR")
# Plugin data directory
plugin_name = "cjm-transcription-plugin-whisper"
if cjm_data_dir
"Return metadata required to register this plugin with the PluginManager."Whisper Plugin (plugin.ipynb)
Plugin implementation for OpenAI Whisper transcription
Import
from cjm_transcription_plugin_whisper.plugin import (
WhisperPluginConfig,
WhisperLocalPlugin
)Classes
@dataclass
class WhisperPluginConfig:
"Configuration for Whisper transcription plugin."
model: str = field(...)
device: str = field(...)
language: Optional[str] = field(...)
task: str = field(...)
temperature: float = field(...)
temperature_increment_on_fallback: Optional[float] = field(...)
beam_size: int = field(...)
best_of: int = field(...)
patience: float = field(...)
length_penalty: Optional[float] = field(...)
suppress_tokens: str = field(...)
initial_prompt: Optional[str] = field(...)
condition_on_previous_text: bool = field(...)
fp16: bool = field(...)
compression_ratio_threshold: float = field(...)
logprob_threshold: float = field(...)
no_speech_threshold: float = field(...)
word_timestamps: bool = field(...)
prepend_punctuations: str = field(...)
append_punctuations: str = field(...)
threads: int = field(...)
model_dir: Optional[str] = field(...)
compile_model: bool = field(...)class WhisperLocalPlugin:
def __init__(self):
"""Initialize the Whisper plugin with default configuration."""
self.logger = logging.getLogger(f"{__name__}.{type(self).__name__}")
self.config: WhisperPluginConfig = None
"OpenAI Whisper transcription plugin."
def __init__(self):
"""Initialize the Whisper plugin with default configuration."""
self.logger = logging.getLogger(f"{__name__}.{type(self).__name__}")
self.config: WhisperPluginConfig = None
"Initialize the Whisper plugin with default configuration."
def name(self) -> str: # Plugin name identifier
"""Get the plugin name identifier."""
return "whisper_local"
@property
def version(self) -> str: # Plugin version string
"Get the plugin name identifier."
def version(self) -> str: # Plugin version string
"""Get the plugin version string."""
return "1.0.0"
@property
def supported_formats(self) -> List[str]: # List of supported audio file formats
"Get the plugin version string."
def supported_formats(self) -> List[str]: # List of supported audio file formats
"""Get the list of supported audio file formats."""
return ["wav", "mp3", "flac", "m4a", "ogg", "webm", "mp4", "avi", "mov"]
def get_current_config(self) -> Dict[str, Any]: # Current configuration as dictionary
"Get the list of supported audio file formats."
def get_current_config(self) -> Dict[str, Any]: # Current configuration as dictionary
"""Return current configuration state."""
if not self.config
"Return current configuration state."
def get_config_schema(self) -> Dict[str, Any]: # JSON Schema for configuration
"""Return JSON Schema for UI generation."""
return dataclass_to_jsonschema(WhisperPluginConfig)
@staticmethod
def get_config_dataclass() -> WhisperPluginConfig: # Configuration dataclass
"Return JSON Schema for UI generation."
def get_config_dataclass() -> WhisperPluginConfig: # Configuration dataclass
"""Return dataclass describing the plugin's configuration options."""
return WhisperPluginConfig
def initialize(
self,
config: Optional[Any] = None # Configuration dataclass, dict, or None
) -> None
"Return dataclass describing the plugin's configuration options."
def initialize(
self,
config: Optional[Any] = None # Configuration dataclass, dict, or None
) -> None
"Initialize or re-configure the plugin (idempotent)."
def execute(
self,
audio: Union[str, Path], # Path to the audio file to transcribe
**kwargs # Additional arguments to override config
) -> TranscriptionResult: # Transcription result with text and metadata
"Transcribe audio using Whisper.
`audio` is a path to a decodable audio file; the caller guarantees it is
model-ready (format / sample-rate / channels handled upstream)."
def is_available(self) -> bool: # True if Whisper and its dependencies are available
"""Check if Whisper is available."""
return WHISPER_AVAILABLE
def cleanup(self) -> None
"Check if Whisper is available."
def cleanup(self) -> None:
"""Clean up resources."""
if self.model is not None
"Clean up resources."