cjm-transcription-plugin-gemini

Google Gemini API plugin for the cjm-transcription-plugin-system library - provides speech-to-text transcription with configurable model selection and parameter control.

Install

pip install cjm_transcription_plugin_gemini

Project Structure

nbs/
└── plugin.ipynb # Plugin implementation for Google Gemini API transcription

Total: 1 notebook across 1 directory

Module Dependencies

graph LR
    plugin[plugin<br/>Gemini Plugin]

No cross-module dependencies detected.

CLI Reference

No CLI commands found in this project.

Module Overview

Detailed documentation for each module in the project:

Gemini Plugin (`plugin.ipynb`)

Plugin implementation for Google Gemini API transcription

Import

from cjm_transcription_plugin_gemini.plugin import (
    GeminiPlugin
)

Functions

@patch
def _get_api_key(
    self:GeminiPlugin
) -> str:  # Returns the API key string
    "Get API key from config or environment."

@patch
def _refresh_available_models(
    self:GeminiPlugin
) -> List[str]:  # Returns list of available model names
    "Fetch and filter available models from Gemini API."

@patch
def _update_max_tokens_for_model(
    self:GeminiPlugin,
    model_name: str  # Model name to update tokens for
) -> None
    "Update max_output_tokens config based on the model's token limit."

@patch
def update_config(
    self:GeminiPlugin,
    config: Dict[str, Any]  # New configuration values
) -> None
    "Update plugin configuration, adjusting max_tokens if model changes."

@patch
def _prepare_audio(
    self:GeminiPlugin,
    audio: Union[AudioData, str, Path]  # Audio data object or path to audio file
) -> Tuple[Path, bool]:  # Returns tuple of (processed audio path, whether temp file was created)
    """
    Prepare audio file for upload.
    
    Returns:
        Tuple of (audio_path, is_temp_file)
    """

@patch
def _upload_audio_file(
    self:GeminiPlugin,
    audio_path: Path  # Path to audio file to upload
) -> Any:  # Returns uploaded file object
    """
    Upload audio file to Gemini API.
    
    Returns:
        Uploaded file object
    """

@patch
def _delete_uploaded_file(
    self:GeminiPlugin,
    file_name: str  # Name of file to delete
) -> None
    "Delete an uploaded file from Gemini API."

@patch
def cleanup(
    self:GeminiPlugin
) -> None
    "Clean up resources."

@patch
def get_available_models(
    self:GeminiPlugin
) -> List[str]:  # Returns list of available model names
    "Get list of available audio-capable models."

@patch
def get_model_info(
    self:GeminiPlugin,
    model_name: Optional[str] = None  # Model name to get info for, defaults to current model
) -> Dict[str, Any]:  # Returns dict with model information
    "Get information about a specific model including token limits."

@patch
def supports_streaming(
    self:GeminiPlugin
) -> bool:  # Returns True if streaming is supported
    """
    Check if this plugin supports streaming transcription.
    
    Returns:
        bool: True, as Gemini supports streaming transcription
    """

@patch
def execute_stream(
    self:GeminiPlugin,
    audio: Union[AudioData, str, Path],  # Audio data object or path to audio file
    **kwargs  # Additional arguments to override config
) -> Generator[str, None, TranscriptionResult]:  # Yields text chunks, returns final result
    """
    Stream transcription results chunk by chunk.
    
    This method streams transcription chunks in real-time as they are generated
    by the Gemini API.
    
    Args:
        audio: Audio data or path to audio file
        **kwargs: Additional plugin-specific parameters
        
    Yields:
        str: Partial transcription text chunks as they become available
        
    Returns:
        TranscriptionResult: Final complete transcription with metadata
        
    Example:
        >>> # Stream transcription chunks in real-time
        >>> for chunk in plugin.execute_stream(audio_file):
        ...     print(chunk, end="", flush=True)
    """

Classes

class GeminiPlugin:
    def __init__(self):
        """Initialize the Gemini plugin with default configuration."""
        self.logger = logging.getLogger(f"{__name__}.{type(self).__name__}")
        self.config = {}
        self.client = None
        self.available_models = []
        self.model_token_limits = {}  # Store model name -> output_token_limit mapping
        self.uploaded_files = []  # Track uploaded files for cleanup
    
    @property
    def name(
        self
    ) -> str:  # Returns the plugin name identifier
    "Google Gemini API transcription plugin."
    
    def __init__(self):
            """Initialize the Gemini plugin with default configuration."""
            self.logger = logging.getLogger(f"{__name__}.{type(self).__name__}")
            self.config = {}
            self.client = None
            self.available_models = []
            self.model_token_limits = {}  # Store model name -> output_token_limit mapping
            self.uploaded_files = []  # Track uploaded files for cleanup
        
        @property
        def name(
            self
        ) -> str:  # Returns the plugin name identifier
        "Initialize the Gemini plugin with default configuration."
    
    def name(
            self
        ) -> str:  # Returns the plugin name identifier
        "Return the plugin name identifier."
    
    def version(
            self
        ) -> str:  # Returns the plugin version string
        "Return the plugin version string."
    
    def supported_formats(
            self
        ) -> List[str]:  # Returns list of supported audio formats
        "Return list of supported audio file formats."
    
    def get_config_schema(
            current_model: str="gemini-2.5-flash",
            max_tokens: int=65536,
            available_models: List[str]=None
        ) -> Dict[str, Any]:  # Returns JSON schema for configuration validation
        "Return configuration schema for Gemini."
    
    def get_current_config(
            self
        ) -> Dict[str, Any]:  # Returns the merged configuration dictionary
        "Return current configuration."
    
    def initialize(
            self,
            config: Optional[Dict[str, Any]] = None  # Configuration dictionary to override defaults
        ) -> None
        "Initialize the plugin with configuration."
    
    def execute(
            self,
            audio: Union[AudioData, str, Path],  # Audio data object or path to audio file
            **kwargs # Additional arguments to override config
        ) -> TranscriptionResult:  # Returns transcription result object
        "Transcribe audio using Gemini."
    
    def is_available(
            self
        ) -> bool:  # Returns True if the Gemini API is available
        "Check if Gemini API is available."