Silero VAD Plugin

Plugin implementation for Voice Activity Detection using Silero VAD with SQLite result caching.

Configuration

The SileroVADConfig dataclass defines all configurable parameters for the VAD model.


SileroVADConfig


def SileroVADConfig(
    threshold:float=0.5, min_speech_duration_ms:int=250, min_silence_duration_ms:int=100, speech_pad_ms:int=30,
    sampling_rate:int=16000, use_onnx:bool=True
)->None:

Configuration for Silero VAD parameters.

Plugin Implementation

The SileroVADPlugin implements MediaAnalysisPlugin to provide voice activity detection.


SileroVADPlugin


def SileroVADPlugin(
    
):

Voice Activity Detection plugin using Silero VAD.

Testing the Plugin

# Test basic functionality
plugin = SileroVADPlugin()

# Check availability
print(f"Silero VAD available: {plugin.is_available()}")
print(f"Plugin name: {plugin.name}")
print(f"Plugin version: {plugin.version}")
print(f"Supported media types: {plugin.supported_media_types}")
print(f"Config class: {plugin.config_class.__name__}")
Silero VAD available: True
Plugin name: silero-vad
Plugin version: 1.0.0
Supported media types: ['audio', 'video']
Config class: SileroVADConfig
# Test configuration dataclass
from dataclasses import fields

print("Configuration fields:")
for f in fields(SileroVADConfig):
    title = f.metadata.get(SCHEMA_TITLE, f.name)
    default = f.default
    print(f"  {title}: {default}")
Configuration fields:
  Threshold: 0.5
  Min Speech Duration (ms): 250
  Min Silence Duration (ms): 100
  Speech Padding (ms): 30
  Sampling Rate: 16000
  Use ONNX: True
# Test configuration validation
test_configs = [
    ({"threshold": 0.5}, "Valid config"),
    ({"threshold": 1.5}, "Threshold out of range"),
    ({"sampling_rate": 44100}, "Invalid sampling rate"),
]

for config, description in test_configs:
    try:
        test_cfg = dict_to_config(SileroVADConfig, config, validate=True)
        print(f"{description}: Valid=True")
    except ValueError as e:
        print(f"{description}: Valid=False")
        print(f"  Error: {str(e)[:80]}")
Valid config: Valid=True
Threshold out of range: Valid=False
  Error: threshold: 1.5 is greater than maximum 1.0
Invalid sampling rate: Valid=False
  Error: sampling_rate: 44100 is not one of [8000, 16000]
# Test get_config_schema for UI generation
import json

schema = plugin.get_config_schema()
print("JSON Schema for SileroVADConfig:")
print(f"  Name: {schema['name']}")
print(f"  Properties count: {len(schema['properties'])}")
print(f"\nSample properties:")
print(json.dumps({k: v for k, v in list(schema['properties'].items())[:3]}, indent=2))
JSON Schema for SileroVADConfig:
  Name: SileroVADConfig
  Properties count: 6

Sample properties:
{
  "threshold": {
    "type": "number",
    "title": "Threshold",
    "description": "Speech probability threshold (0.0 - 1.0). Higher values reduce false positives.",
    "minimum": 0.0,
    "maximum": 1.0,
    "default": 0.5
  },
  "min_speech_duration_ms": {
    "type": "integer",
    "title": "Min Speech Duration (ms)",
    "description": "Segments shorter than this will be ignored.",
    "minimum": 0,
    "default": 250
  },
  "min_silence_duration_ms": {
    "type": "integer",
    "title": "Min Silence Duration (ms)",
    "description": "Silence shorter than this will not split segments.",
    "minimum": 0,
    "default": 100
  }
}