cjm-transcription-plugin-voxtral-vllm

Mistral Voxtral plugin for the cjm-transcription-plugin-system library - provides local speech-to-text transcription through vLLM with configurable model selection and parameter control.

Install

pip install cjm_transcription_plugin_voxtral_vllm

Project Structure

nbs/
└── plugin.ipynb # Plugin implementation for Mistral Voxtral transcription through vLLM server

Total: 1 notebook

Module Dependencies

graph LR
    plugin[plugin<br/>Voxtral VLLM Plugin]

No cross-module dependencies detected.

CLI Reference

No CLI commands found in this project.

Module Overview

Detailed documentation for each module in the project:

Voxtral VLLM Plugin (`plugin.ipynb`)

Plugin implementation for Mistral Voxtral transcription through vLLM server

Import

from cjm_transcription_plugin_voxtral_vllm.plugin import (
    VLLMServer,
    VoxtralVLLMPlugin
)

Functions

@patch
def supports_streaming(
    self: VoxtralVLLMPlugin
) -> bool
    "Check if this plugin supports streaming transcription."

@patch
def execute_stream(
    self: VoxtralVLLMPlugin,
    audio: Union[AudioData, str, Path],  # Audio data or path to audio file
    **kwargs  # Additional plugin-specific parameters
) -> Generator[str, None, TranscriptionResult]:  # Yields text chunks, returns final result
    """
    Stream transcription results chunk by chunk.
    
    Args:
        audio: Audio data or path to audio file
        **kwargs: Additional plugin-specific parameters
        
    Yields:
        str: Partial transcription text chunks as they become available
        
    Returns:
        TranscriptionResult: Final complete transcription with metadata
    """

Classes

class VLLMServer:
    def __init__(
        self,
        model: str = "mistralai/Voxtral-Mini-3B-2507",
        port: int = 8000,
        host: str = "0.0.0.0",
        gpu_memory_utilization: float = 0.85,
        log_level: str = "INFO",  # DEBUG, INFO, WARNING, ERROR
        capture_logs: bool = True,
        **kwargs
    )
    
    def __init__(
            self,
            model: str = "mistralai/Voxtral-Mini-3B-2507",
            port: int = 8000,
            host: str = "0.0.0.0",
            gpu_memory_utilization: float = 0.85,
            log_level: str = "INFO",  # DEBUG, INFO, WARNING, ERROR
            capture_logs: bool = True,
            **kwargs
        )
    
    def add_log_callback(self, callback: Callable[[str], None]):
            """Add a callback function that will be called for each log line.
            
            Args:
                callback: Function that takes a log line string as input
            """
            self.log_callbacks.append(callback)
        
        def _process_log_line(self, line: str)
        "Add a callback function that will be called for each log line.

Args:
    callback: Function that takes a log line string as input"
    
    def start(self, wait_for_ready: bool = True, timeout: int = 120, show_progress: bool = True):
            """Start the vLLM server.
            
            Args:
                wait_for_ready: Wait for server to be ready before returning
                timeout: Maximum time to wait for server to be ready
                show_progress: Show progress indicators during startup
            """
            if self.is_running()
        "Start the vLLM server.

Args:
    wait_for_ready: Wait for server to be ready before returning
    timeout: Maximum time to wait for server to be ready
    show_progress: Show progress indicators during startup"
    
    def stop(self):
            """Stop the vLLM server."""
            if self.process and self.process.poll() is None
        "Stop the vLLM server."
    
    def restart(self):
            """Restart the server."""
            self.stop()
            time.sleep(2)
            self.start()
        
        def is_running(self) -> bool
        "Restart the server."
    
    def is_running(self) -> bool
        "Check if server is running and responsive.

This method checks both if the process is alive and if the server
is actually responding to health checks."
    
    def get_recent_logs(self, n: int = 100) -> List[str]:
            """Get the most recent n log lines.
            
            Args:
                n: Number of recent log lines to retrieve
                
            Returns:
                List of recent log lines
            """
            logs = []
            while not self.log_queue.empty() and len(logs) < n
        "Get the most recent n log lines.

Args:
    n: Number of recent log lines to retrieve
    
Returns:
    List of recent log lines"
    
    def get_metrics_from_logs(self) -> dict:
            """Parse recent logs to extract performance metrics.
            
            Returns:
                Dictionary with metrics like throughput, GPU usage, etc.
            """
            metrics = {
                "prompt_throughput": 0.0,
        "Parse recent logs to extract performance metrics.

Returns:
    Dictionary with metrics like throughput, GPU usage, etc."
    
    def tail_logs(self, follow: bool = True, n: int = 10):
            """Tail the server logs (similar to tail -f).
            
            Args:
                follow: Continue displaying new logs as they arrive
                n: Number of initial lines to display
            """
            # Display recent logs
            recent = self.get_recent_logs(n)
            for line in recent
        "Tail the server logs (similar to tail -f).

Args:
    follow: Continue displaying new logs as they arrive
    n: Number of initial lines to display"

class VoxtralVLLMPlugin:
    def __init__(self):
        """Initialize the Voxtral VLLM plugin with default configuration."""
        self.logger = logging.getLogger(f"{__name__}.{type(self).__name__}")
        self.config = {}
        self.server: Optional[VLLMServer] = None
    "Mistral Voxtral transcription plugin via vLLM server."
    
    def __init__(self):
            """Initialize the Voxtral VLLM plugin with default configuration."""
            self.logger = logging.getLogger(f"{__name__}.{type(self).__name__}")
            self.config = {}
            self.server: Optional[VLLMServer] = None
        "Initialize the Voxtral VLLM plugin with default configuration."
    
    def name(
            self
        ) -> str:  # Returns the plugin name
        "Get the plugin name identifier."
    
    def version(
            self
        ) -> str:  # Returns the plugin version
        "Get the plugin version string."
    
    def supported_formats(
            self
        ) -> List[str]:  # Returns list of supported audio formats
        "Get the list of supported audio file formats."
    
    def get_config_schema(
        ) -> Dict[str, Any]:  # Returns the configuration schema dictionary
        "Return configuration schema for Voxtral VLLM."
    
    def get_current_config(
            self
        ) -> Dict[str, Any]:  # Returns the current configuration dictionary
        "Return current configuration."
    
    def initialize(
            self,
            config: Optional[Dict[str, Any]] = None  # Configuration dictionary to initialize the plugin
        ) -> None
        "Initialize the plugin with configuration."
    
    def execute(
            self,
            audio: Union[AudioData, str, Path],  # Audio data or path to audio file to transcribe
            **kwargs  # Additional arguments to override config
        ) -> TranscriptionResult:  # Returns transcription result with text and metadata
        "Transcribe audio using Voxtral via vLLM."
    
    def is_available(
            self
        ) -> bool:  # Returns True if vLLM and its dependencies are available
        "Check if vLLM and required dependencies are available."
    
    def cleanup(
            self
        ) -> None
        "Clean up resources."