cjm-transcription-plugin-voxtral-hf
Mistral Voxtral plugin for the cjm-transcription-plugin-system library - provides local speech-to-text transcription through 🤗 Transformers with configurable model selection and parameter control.
Install
pip install cjm_transcription_plugin_voxtral_hfProject Structure
nbs/
├── meta.ipynb # Metadata introspection for the Voxtral HF plugin used by cjm-ctl to generate the registration manifest.
└── plugin.ipynb # Plugin implementation for Mistral Voxtral transcription through Hugging Face Transformers
Total: 2 notebooks
Module Dependencies
graph LR
meta[meta<br/>Metadata]
plugin[plugin<br/>Voxtral HF Plugin]
plugin --> meta
1 cross-module dependencies detected
CLI Reference
No CLI commands found in this project.
Module Overview
Detailed documentation for each module in the project:
Metadata (meta.ipynb)
Metadata introspection for the Voxtral HF plugin used by cjm-ctl to generate the registration manifest.
Import
from cjm_transcription_plugin_voxtral_hf.meta import (
get_plugin_metadata
)Functions
def get_plugin_metadata() -> Dict[str, Any]: # Plugin metadata for manifest generation
"""Return metadata required to register this plugin with the PluginManager."""
# Fallback base path (current behavior for backward compatibility)
base_path = os.path.dirname(os.path.dirname(sys.executable))
# Use CJM config if available, else fallback to env-relative paths
cjm_data_dir = os.environ.get("CJM_DATA_DIR")
cjm_models_dir = os.environ.get("CJM_MODELS_DIR")
# Plugin data directory
plugin_name = "cjm-transcription-plugin-voxtral-hf"
if cjm_data_dir
"Return metadata required to register this plugin with the PluginManager."Voxtral HF Plugin (plugin.ipynb)
Plugin implementation for Mistral Voxtral transcription through Hugging Face Transformers
Import
from cjm_transcription_plugin_voxtral_hf.plugin import (
VoxtralHFPluginConfig,
VoxtralHFPlugin
)Functions
@patch
def supports_streaming(
self:VoxtralHFPlugin
) -> bool: # True if streaming is supported
"Check if this plugin supports streaming transcription."@patch
def execute_stream(
self:VoxtralHFPlugin,
audio: Union[AudioData, str, Path], # Audio data or path to audio file
**kwargs # Additional plugin-specific parameters
) -> Generator[str, None, TranscriptionResult]: # Yields text chunks, returns final result
"Stream transcription results chunk by chunk."Classes
@dataclass
class VoxtralHFPluginConfig:
"Configuration for Voxtral HF transcription plugin."
model_id: str = field(...)
device: str = field(...)
dtype: str = field(...)
language: Optional[str] = field(...)
max_new_tokens: int = field(...)
do_sample: bool = field(...)
temperature: float = field(...)
top_p: float = field(...)
streaming: bool = field(...)
trust_remote_code: bool = field(...)
cache_dir: Optional[str] = field(...)
compile_model: bool = field(...)
load_in_8bit: bool = field(...)
load_in_4bit: bool = field(...)class VoxtralHFPlugin:
def __init__(self):
"""Initialize the Voxtral HF plugin with default configuration."""
self.logger = logging.getLogger(f"{__name__}.{type(self).__name__}")
self.config: VoxtralHFPluginConfig = None
"Mistral Voxtral transcription plugin via Hugging Face Transformers."
def __init__(self):
"""Initialize the Voxtral HF plugin with default configuration."""
self.logger = logging.getLogger(f"{__name__}.{type(self).__name__}")
self.config: VoxtralHFPluginConfig = None
"Initialize the Voxtral HF plugin with default configuration."
def name(self) -> str: # Plugin name identifier
"""Get the plugin name identifier."""
return "voxtral_hf"
@property
def version(self) -> str: # Plugin version string
"Get the plugin name identifier."
def version(self) -> str: # Plugin version string
"""Get the plugin version string."""
return "1.0.0"
@property
def supported_formats(self) -> List[str]: # List of supported audio formats
"Get the plugin version string."
def supported_formats(self) -> List[str]: # List of supported audio formats
"""Get the list of supported audio file formats."""
return ["wav", "mp3", "flac", "m4a", "ogg", "webm", "mp4", "avi", "mov"]
def get_current_config(self) -> Dict[str, Any]: # Current configuration as dictionary
"Get the list of supported audio file formats."
def get_current_config(self) -> Dict[str, Any]: # Current configuration as dictionary
"""Return current configuration state."""
if not self.config
"Return current configuration state."
def get_config_schema(self) -> Dict[str, Any]: # JSON Schema for configuration
"""Return JSON Schema for UI generation."""
return dataclass_to_jsonschema(VoxtralHFPluginConfig)
@staticmethod
def get_config_dataclass() -> VoxtralHFPluginConfig: # Configuration dataclass
"Return JSON Schema for UI generation."
def get_config_dataclass() -> VoxtralHFPluginConfig: # Configuration dataclass
"""Return dataclass describing the plugin's configuration options."""
return VoxtralHFPluginConfig
def initialize(
self,
config: Optional[Any] = None # Configuration dataclass, dict, or None
) -> None
"Return dataclass describing the plugin's configuration options."
def initialize(
self,
config: Optional[Any] = None # Configuration dataclass, dict, or None
) -> None
"Initialize or re-configure the plugin (idempotent)."
def execute(
self,
audio: Union[AudioData, str, Path], # Audio data or path to audio file to transcribe
**kwargs # Additional arguments to override config
) -> TranscriptionResult: # Transcription result with text and metadata
"Transcribe audio using Voxtral."
def is_available(self) -> bool: # True if Voxtral and its dependencies are available
"""Check if Voxtral is available."""
return VOXTRAL_AVAILABLE
def cleanup(self) -> None
"Check if Voxtral is available."
def cleanup(self) -> None:
"""Clean up resources with aggressive memory management."""
if self.model is None and self.processor is None
"Clean up resources with aggressive memory management."