Voxtral HF Plugin

Plugin implementation for Mistral Voxtral transcription through Hugging Face Transformers

source

VoxtralHFPlugin


def VoxtralHFPlugin(
    
):

Mistral Voxtral transcription plugin via Hugging Face Transformers.


source

VoxtralHFPluginConfig


def VoxtralHFPluginConfig(
    model_id:str='mistralai/Voxtral-Mini-3B-2507', device:str='auto', dtype:str='auto', language:Optional='en',
    max_new_tokens:int=25000, do_sample:bool=False, temperature:float=1.0, top_p:float=0.95, streaming:bool=False,
    trust_remote_code:bool=False, cache_dir:Optional=None, compile_model:bool=False, load_in_8bit:bool=False,
    load_in_4bit:bool=False
)->None:

Configuration for Voxtral HF transcription plugin.

Streaming Support

The execute_stream method provides real-time transcription output:

  • Yields: Partial transcription text chunks as they become available during generation
  • Returns: Final TranscriptionResult with complete text and metadata after streaming completes
  • Parameters: Same as execute() - accepts AudioData, file path string, or Path object, plus optional kwargs to override config
  • Usage: Iterate over the generator to receive text chunks, then access the return value for the final result

source

VoxtralHFPlugin.execute_stream


def execute_stream(
    audio:Union, # Audio data or path to audio file
    kwargs:VAR_KEYWORD
)->Generator: # Yields text chunks, returns final result

Stream transcription results chunk by chunk.


source

VoxtralHFPlugin.supports_streaming


def supports_streaming(
    
)->bool: # True if streaming is supported

Check if this plugin supports streaming transcription.

Testing the Plugin

# Test basic functionality
plugin = VoxtralHFPlugin()

# Check availability
print(f"Voxtral available: {plugin.is_available()}")
print(f"Plugin name: {plugin.name}")
print(f"Plugin version: {plugin.version}")
print(f"Supported formats: {plugin.supported_formats}")
print(f"Config class: {plugin.config_class.__name__}")
print(f"Supports streaming: {plugin.supports_streaming()}")
Voxtral available: True
Plugin name: voxtral_hf
Plugin version: 1.0.0
Supported formats: ['wav', 'mp3', 'flac', 'm4a', 'ogg', 'webm', 'mp4', 'avi', 'mov']
Config class: VoxtralHFPluginConfig
Supports streaming: True
# Test configuration dataclass
from dataclasses import fields

print("Available models:")
model_field = next(f for f in fields(VoxtralHFPluginConfig) if f.name == "model_id")
for model in model_field.metadata.get(SCHEMA_ENUM, []):
    print(f"  - {model}")
Available models:
  - mistralai/Voxtral-Mini-3B-2507
  - mistralai/Voxtral-Small-24B-2507
# Test configuration validation
test_configs = [
    ({"model_id": "mistralai/Voxtral-Mini-3B-2507"}, "Valid config"),
    ({"model_id": "invalid_model"}, "Invalid model"),
    ({"model_id": "mistralai/Voxtral-Mini-3B-2507", "temperature": 2.5}, "Temperature out of range"),
]

for config, description in test_configs:
    try:
        test_cfg = dict_to_config(VoxtralHFPluginConfig, config, validate=True)
        print(f"{description}: Valid=True")
    except ValueError as e:
        print(f"{description}: Valid=False")
        print(f"  Error: {str(e)[:100]}")
Valid config: Valid=True
Invalid model: Valid=False
  Error: model_id: 'invalid_model' is not one of ['mistralai/Voxtral-Mini-3B-2507', 'mistralai/Voxtral-Small-
Temperature out of range: Valid=False
  Error: temperature: 2.5 is greater than maximum 2.0
# Test initialization and get_current_config (returns dict now)
plugin.initialize({"model_id": "mistralai/Voxtral-Mini-3B-2507", "device": "cpu"})
current_config = plugin.get_current_config()
print(f"Current config (dict): model_id={current_config['model_id']}")
Current config (dict): model_id=mistralai/Voxtral-Mini-3B-2507
# Test get_config_schema for UI generation
import json

schema = plugin.get_config_schema()
print("JSON Schema for VoxtralHFPluginConfig:")
print(f"  Name: {schema['name']}")
print(f"  Properties count: {len(schema['properties'])}")
print(f"  Model field enum: {schema['properties']['model_id'].get('enum', [])}")
print(f"\nSample properties:")
print(json.dumps({k: v for k, v in list(schema['properties'].items())[:3]}, indent=2))
JSON Schema for VoxtralHFPluginConfig:
  Name: VoxtralHFPluginConfig
  Properties count: 14
  Model field enum: ['mistralai/Voxtral-Mini-3B-2507', 'mistralai/Voxtral-Small-24B-2507']

Sample properties:
{
  "model_id": {
    "type": "string",
    "title": "Model ID",
    "description": "Voxtral model to use. Mini is faster, Small is more accurate.",
    "enum": [
      "mistralai/Voxtral-Mini-3B-2507",
      "mistralai/Voxtral-Small-24B-2507"
    ],
    "default": "mistralai/Voxtral-Mini-3B-2507"
  },
  "device": {
    "type": "string",
    "title": "Device",
    "description": "Device for inference (auto will use CUDA if available)",
    "enum": [
      "auto",
      "cpu",
      "cuda"
    ],
    "default": "auto"
  },
  "dtype": {
    "type": "string",
    "title": "Data Type",
    "description": "Data type for model weights (auto will use bfloat16 on GPU, float32 on CPU)",
    "enum": [
      "auto",
      "bfloat16",
      "float16",
      "float32"
    ],
    "default": "auto"
  }
}