cjm-transcription-plugin-gemini
Google Gemini API plugin for the cjm-transcription-plugin-system library - provides speech-to-text transcription with configurable model selection and parameter control.
Install
pip install cjm_transcription_plugin_gemini
Project Structure
nbs/
└── plugin.ipynb # Plugin implementation for Google Gemini API transcription
Total: 1 notebook across 1 directory
Module Dependencies
graph LR
plugin[plugin<br/>Gemini Plugin]
No cross-module dependencies detected.
CLI Reference
No CLI commands found in this project.
Module Overview
Detailed documentation for each module in the project:
Gemini Plugin (plugin.ipynb
)
Plugin implementation for Google Gemini API transcription
Import
from cjm_transcription_plugin_gemini.plugin import (
GeminiPlugin )
Functions
@patch
def _get_api_key(
self:GeminiPlugin
-> str: # Returns the API key string
) "Get API key from config or environment."
@patch
def _refresh_available_models(
self:GeminiPlugin
-> List[str]: # Returns list of available model names
) "Fetch and filter available models from Gemini API."
@patch
def _update_max_tokens_for_model(
self:GeminiPlugin,
str # Model name to update tokens for
model_name: -> None
) "Update max_output_tokens config based on the model's token limit."
@patch
def update_config(
self:GeminiPlugin,
str, Any] # New configuration values
config: Dict[-> None
) "Update plugin configuration, adjusting max_tokens if model changes."
@patch
def _prepare_audio(
self:GeminiPlugin,
str, Path] # Audio data object or path to audio file
audio: Union[AudioData, -> Tuple[Path, bool]: # Returns tuple of (processed audio path, whether temp file was created)
) """
Prepare audio file for upload.
Returns:
Tuple of (audio_path, is_temp_file)
"""
@patch
def _upload_audio_file(
self:GeminiPlugin,
# Path to audio file to upload
audio_path: Path -> Any: # Returns uploaded file object
) """
Upload audio file to Gemini API.
Returns:
Uploaded file object
"""
@patch
def _delete_uploaded_file(
self:GeminiPlugin,
str # Name of file to delete
file_name: -> None
) "Delete an uploaded file from Gemini API."
@patch
def cleanup(
self:GeminiPlugin
-> None
) "Clean up resources."
@patch
def get_available_models(
self:GeminiPlugin
-> List[str]: # Returns list of available model names
) "Get list of available audio-capable models."
@patch
def get_model_info(
self:GeminiPlugin,
str] = None # Model name to get info for, defaults to current model
model_name: Optional[-> Dict[str, Any]: # Returns dict with model information
) "Get information about a specific model including token limits."
@patch
def supports_streaming(
self:GeminiPlugin
-> bool: # Returns True if streaming is supported
) """
Check if this plugin supports streaming transcription.
Returns:
bool: True, as Gemini supports streaming transcription
"""
@patch
def execute_stream(
self:GeminiPlugin,
str, Path], # Audio data object or path to audio file
audio: Union[AudioData, **kwargs # Additional arguments to override config
-> Generator[str, None, TranscriptionResult]: # Yields text chunks, returns final result
) """
Stream transcription results chunk by chunk.
This method streams transcription chunks in real-time as they are generated
by the Gemini API.
Args:
audio: Audio data or path to audio file
**kwargs: Additional plugin-specific parameters
Yields:
str: Partial transcription text chunks as they become available
Returns:
TranscriptionResult: Final complete transcription with metadata
Example:
>>> # Stream transcription chunks in real-time
>>> for chunk in plugin.execute_stream(audio_file):
... print(chunk, end="", flush=True)
"""
Classes
class GeminiPlugin:
def __init__(self):
"""Initialize the Gemini plugin with default configuration."""
self.logger = logging.getLogger(f"{__name__}.{type(self).__name__}")
self.config = {}
self.client = None
self.available_models = []
self.model_token_limits = {} # Store model name -> output_token_limit mapping
self.uploaded_files = [] # Track uploaded files for cleanup
@property
def name(
self
-> str: # Returns the plugin name identifier
) "Google Gemini API transcription plugin."
def __init__(self):
"""Initialize the Gemini plugin with default configuration."""
self.logger = logging.getLogger(f"{__name__}.{type(self).__name__}")
self.config = {}
self.client = None
self.available_models = []
self.model_token_limits = {} # Store model name -> output_token_limit mapping
self.uploaded_files = [] # Track uploaded files for cleanup
@property
def name(
self
-> str: # Returns the plugin name identifier
) "Initialize the Gemini plugin with default configuration."
def name(
self
-> str: # Returns the plugin name identifier
) "Return the plugin name identifier."
def version(
self
-> str: # Returns the plugin version string
) "Return the plugin version string."
def supported_formats(
self
-> List[str]: # Returns list of supported audio formats
) "Return list of supported audio file formats."
def get_config_schema(
str="gemini-2.5-flash",
current_model: int=65536,
max_tokens: str]=None
available_models: List[-> Dict[str, Any]: # Returns JSON schema for configuration validation
) "Return configuration schema for Gemini."
def get_current_config(
self
-> Dict[str, Any]: # Returns the merged configuration dictionary
) "Return current configuration."
def initialize(
self,
str, Any]] = None # Configuration dictionary to override defaults
config: Optional[Dict[-> None
) "Initialize the plugin with configuration."
def execute(
self,
str, Path], # Audio data object or path to audio file
audio: Union[AudioData, **kwargs # Additional arguments to override config
-> TranscriptionResult: # Returns transcription result object
) "Transcribe audio using Gemini."
def is_available(
self
-> bool: # Returns True if the Gemini API is available
) "Check if Gemini API is available."