cjm-transcription-plugin-voxtral-vllm
Mistral Voxtral plugin for the cjm-transcription-plugin-system library - provides local speech-to-text transcription through vLLM with configurable model selection and parameter control.
Install
pip install cjm_transcription_plugin_voxtral_vllm
Project Structure
nbs/
└── plugin.ipynb # Plugin implementation for Mistral Voxtral transcription through vLLM server
Total: 1 notebook
Module Dependencies
graph LR
plugin[plugin<br/>Voxtral VLLM Plugin]
No cross-module dependencies detected.
CLI Reference
No CLI commands found in this project.
Module Overview
Detailed documentation for each module in the project:
Voxtral VLLM Plugin (plugin.ipynb
)
Plugin implementation for Mistral Voxtral transcription through vLLM server
Import
from cjm_transcription_plugin_voxtral_vllm.plugin import (
VLLMServer,
VoxtralVLLMPlugin )
Functions
@patch
def supports_streaming(
self: VoxtralVLLMPlugin
-> bool
) "Check if this plugin supports streaming transcription."
@patch
def execute_stream(
self: VoxtralVLLMPlugin,
str, Path], # Audio data or path to audio file
audio: Union[AudioData, **kwargs # Additional plugin-specific parameters
-> Generator[str, None, TranscriptionResult]: # Yields text chunks, returns final result
) """
Stream transcription results chunk by chunk.
Args:
audio: Audio data or path to audio file
**kwargs: Additional plugin-specific parameters
Yields:
str: Partial transcription text chunks as they become available
Returns:
TranscriptionResult: Final complete transcription with metadata
"""
Classes
class VLLMServer:
def __init__(
self,
str = "mistralai/Voxtral-Mini-3B-2507",
model: int = 8000,
port: str = "0.0.0.0",
host: float = 0.85,
gpu_memory_utilization: str = "INFO", # DEBUG, INFO, WARNING, ERROR
log_level: bool = True,
capture_logs: **kwargs
)
def __init__(
self,
str = "mistralai/Voxtral-Mini-3B-2507",
model: int = 8000,
port: str = "0.0.0.0",
host: float = 0.85,
gpu_memory_utilization: str = "INFO", # DEBUG, INFO, WARNING, ERROR
log_level: bool = True,
capture_logs: **kwargs
)
def add_log_callback(self, callback: Callable[[str], None]):
"""Add a callback function that will be called for each log line.
Args:
callback: Function that takes a log line string as input
"""
self.log_callbacks.append(callback)
def _process_log_line(self, line: str)
"Add a callback function that will be called for each log line.
Args:
as input"
callback: Function that takes a log line string
def start(self, wait_for_ready: bool = True, timeout: int = 120, show_progress: bool = True):
"""Start the vLLM server.
Args:
wait_for_ready: Wait for server to be ready before returning
timeout: Maximum time to wait for server to be ready
show_progress: Show progress indicators during startup
"""
if self.is_running()
"Start the vLLM server.
Args:
for server to be ready before returning
wait_for_ready: Wait for server to be ready
timeout: Maximum time to wait "
show_progress: Show progress indicators during startup
def stop(self):
"""Stop the vLLM server."""
if self.process and self.process.poll() is None
"Stop the vLLM server."
def restart(self):
"""Restart the server."""
self.stop()
2)
time.sleep(self.start()
def is_running(self) -> bool
"Restart the server."
def is_running(self) -> bool
"Check if server is running and responsive.
This method checks both if the process is alive and if the server
is actually responding to health checks."
def get_recent_logs(self, n: int = 100) -> List[str]:
"""Get the most recent n log lines.
Args:
n: Number of recent log lines to retrieve
Returns:
List of recent log lines
"""
= []
logs while not self.log_queue.empty() and len(logs) < n
"Get the most recent n log lines.
Args:
n: Number of recent log lines to retrieve
Returns:"
List of recent log lines
def get_metrics_from_logs(self) -> dict:
"""Parse recent logs to extract performance metrics.
Returns:
Dictionary with metrics like throughput, GPU usage, etc.
"""
= {
metrics "prompt_throughput": 0.0,
"Parse recent logs to extract performance metrics.
Returns:
with metrics like throughput, GPU usage, etc."
Dictionary
def tail_logs(self, follow: bool = True, n: int = 10):
"""Tail the server logs (similar to tail -f).
Args:
follow: Continue displaying new logs as they arrive
n: Number of initial lines to display
"""
# Display recent logs
= self.get_recent_logs(n)
recent for line in recent
"Tail the server logs (similar to tail -f).
Args:
as they arrive
follow: Continue displaying new logs " n: Number of initial lines to display
class VoxtralVLLMPlugin:
def __init__(self):
"""Initialize the Voxtral VLLM plugin with default configuration."""
self.logger = logging.getLogger(f"{__name__}.{type(self).__name__}")
self.config = {}
self.server: Optional[VLLMServer] = None
"Mistral Voxtral transcription plugin via vLLM server."
def __init__(self):
"""Initialize the Voxtral VLLM plugin with default configuration."""
self.logger = logging.getLogger(f"{__name__}.{type(self).__name__}")
self.config = {}
self.server: Optional[VLLMServer] = None
"Initialize the Voxtral VLLM plugin with default configuration."
def name(
self
-> str: # Returns the plugin name
) "Get the plugin name identifier."
def version(
self
-> str: # Returns the plugin version
) "Get the plugin version string."
def supported_formats(
self
-> List[str]: # Returns list of supported audio formats
) "Get the list of supported audio file formats."
def get_config_schema(
-> Dict[str, Any]: # Returns the configuration schema dictionary
) "Return configuration schema for Voxtral VLLM."
def get_current_config(
self
-> Dict[str, Any]: # Returns the current configuration dictionary
) "Return current configuration."
def initialize(
self,
str, Any]] = None # Configuration dictionary to initialize the plugin
config: Optional[Dict[-> None
) "Initialize the plugin with configuration."
def execute(
self,
str, Path], # Audio data or path to audio file to transcribe
audio: Union[AudioData, **kwargs # Additional arguments to override config
-> TranscriptionResult: # Returns transcription result with text and metadata
) "Transcribe audio using Voxtral via vLLM."
def is_available(
self
-> bool: # Returns True if vLLM and its dependencies are available
) "Check if vLLM and required dependencies are available."
def cleanup(
self
-> None
) "Clean up resources."