cjm-transcript-segmentation
Install
pip install cjm_transcript_segmentationProject Structure
nbs/
├── components/ (6)
│ ├── callbacks.ipynb # JavaScript callback generators for Phase 2 segmentation keyboard interaction
│ ├── card_stack_config.ipynb # Card stack configuration constants for the Phase 2 segmentation UI
│ ├── helpers.ipynb # Shared helper functions for the segmentation module
│ ├── keyboard_config.ipynb # Segmentation-specific keyboard actions, modes, and zone configuration
│ ├── segment_card.ipynb # Segment card component with view and split modes
│ └── step_renderer.ipynb # Composable renderers for the Phase 2 segmentation column and shared chrome
├── routes/ (4)
│ ├── card_stack.ipynb # Card stack UI operations — navigation, viewport, mode switching, and response builders
│ ├── core.ipynb # Segmentation step state management helpers
│ ├── handlers.ipynb # Segmentation workflow handlers — init, split, merge, undo, reset, AI split
│ └── init.ipynb # Router assembly for Phase 2 segmentation routes
├── services/ (1)
│ └── segmentation.ipynb # Segmentation service for text decomposition via NLTK plugin
├── html_ids.ipynb # HTML ID constants for Phase 2 Left Column: Text Segmentation
├── models.ipynb # Data models and URL bundles for Phase 2 Left Column: Text Segmentation
└── utils.ipynb # Text processing utilities for segmentation: word counting, position mapping, and statistics
Total: 14 notebooks across 3 directories
Module Dependencies
graph LR
components_callbacks[components.callbacks<br/>callbacks]
components_card_stack_config[components.card_stack_config<br/>card_stack_config]
components_helpers[components.helpers<br/>helpers]
components_keyboard_config[components.keyboard_config<br/>keyboard_config]
components_segment_card[components.segment_card<br/>segment_card]
components_step_renderer[components.step_renderer<br/>step_renderer]
html_ids[html_ids<br/>html_ids]
models[models<br/>models]
routes_card_stack[routes.card_stack<br/>card_stack]
routes_core[routes.core<br/>core]
routes_handlers[routes.handlers<br/>handlers]
routes_init[routes.init<br/>init]
services_segmentation[services.segmentation<br/>segmentation]
utils[utils<br/>utils]
components_helpers --> models
components_keyboard_config --> components_card_stack_config
components_segment_card --> models
components_segment_card --> html_ids
components_segment_card --> components_card_stack_config
components_step_renderer --> utils
components_step_renderer --> models
components_step_renderer --> components_callbacks
components_step_renderer --> html_ids
components_step_renderer --> components_segment_card
components_step_renderer --> components_card_stack_config
routes_card_stack --> routes_core
routes_card_stack --> utils
routes_card_stack --> models
routes_card_stack --> components_step_renderer
routes_card_stack --> components_segment_card
routes_card_stack --> components_card_stack_config
routes_core --> models
routes_handlers --> routes_core
routes_handlers --> models
routes_handlers --> utils
routes_handlers --> services_segmentation
routes_handlers --> components_step_renderer
routes_handlers --> html_ids
routes_handlers --> components_card_stack_config
routes_handlers --> routes_card_stack
routes_init --> routes_handlers
routes_init --> services_segmentation
routes_init --> models
routes_init --> routes_core
routes_init --> routes_card_stack
services_segmentation --> models
utils --> models
33 cross-module dependencies detected
CLI Reference
No CLI commands found in this project.
Module Overview
Detailed documentation for each module in the project:
callbacks (callbacks.ipynb)
JavaScript callback generators for Phase 2 segmentation keyboard interaction
Import
from cjm_transcript_segmentation.components.callbacks import (
generate_seg_callbacks_script
)Functions
def _generate_focus_change_script(
focus_input_id: str, # ID of hidden input for focused segment index
) -> str: # JavaScript code for focus change callback
"Generate JavaScript for card focus change handling."def generate_seg_callbacks_script(
ids:CardStackHtmlIds, # Card stack HTML IDs
button_ids:CardStackButtonIds, # Card stack button IDs
config:CardStackConfig, # Card stack configuration
urls:CardStackUrls, # Card stack URL bundle
container_id:str, # ID of the segmentation container (parent of card stack)
focus_input_id:str, # ID of hidden input for focused segment index
) -> any: # Script element with all JavaScript callbacks
"""
Generate JavaScript for segmentation keyboard interaction.
Delegates card-stack-generic JS to the library and injects the
focus change callback via extra_scripts.
"""card_stack (card_stack.ipynb)
Card stack UI operations — navigation, viewport, mode switching, and response builders
Import
from cjm_transcript_segmentation.routes.card_stack import (
init_card_stack_router
)Functions
def _make_renderer(
urls: SegmentationUrls, # URL bundle
is_split_mode: bool = False, # Whether split mode is active
caret_position: int = 0, # Caret position for split mode
source_boundaries: Set[int] = None, # Indices where source_id changes
) -> Any: # Card renderer callback
"Create a segment card renderer with captured URLs and mode state."def _build_slots_oob(
segment_dicts: List[Dict[str, Any]], # Serialized segments
state: CardStackState, # Card stack viewport state
urls: SegmentationUrls, # URL bundle
caret_position: int = 0, # Caret position for split mode
) -> List[Any]: # OOB slot elements
"Build OOB slot updates for the viewport sections."def _build_nav_response(
segment_dicts: List[Dict[str, Any]], # Serialized segments
state: CardStackState, # Card stack viewport state
urls: SegmentationUrls, # URL bundle
caret_position: int = 0, # Caret position for split mode
) -> Tuple: # OOB elements (slots + progress + focus)
"Build OOB response for navigation and mode changes."def _handle_seg_navigate(
state_store: WorkflowStateStore, # The workflow state store
workflow_id: str, # The workflow identifier
sess, # FastHTML session object
direction: str, # Navigation direction: "up", "down", "first", "last", "page_up", "page_down"
urls: SegmentationUrls, # URL bundle for segmentation routes
): # OOB slot updates with progress, focus, and source position
"Navigate to a different segment in the viewport using OOB slot swaps."def _handle_seg_navigate_to_index(
state_store: WorkflowStateStore, # The workflow state store
workflow_id: str, # The workflow identifier
sess, # FastHTML session object
target_index: int, # Target segment index to navigate to
urls: SegmentationUrls, # URL bundle for segmentation routes
): # OOB slot updates with progress, focus, and source position
"Navigate to a specific segment index in the viewport using OOB slot swaps."def _handle_seg_enter_split_mode(
state_store: WorkflowStateStore, # The workflow state store
workflow_id: str, # The workflow identifier
request, # FastHTML request object
sess, # FastHTML session object
segment_index: int, # Index of segment to enter split mode for
urls: SegmentationUrls, # URL bundle for segmentation routes
): # OOB slot updates with split mode active for focused segment
"Enter split mode for a specific segment."def _handle_seg_exit_split_mode(
state_store: WorkflowStateStore, # The workflow state store
workflow_id: str, # The workflow identifier
request, # FastHTML request object
sess, # FastHTML session object
urls: SegmentationUrls, # URL bundle for segmentation routes
): # OOB slot updates with split mode deactivated
"Exit split mode."async def _handle_seg_update_viewport(
state_store: WorkflowStateStore, # The workflow state store
workflow_id: str, # The workflow identifier
request, # FastHTML request object
sess, # FastHTML session object
visible_count: int, # New number of visible cards
urls: SegmentationUrls, # URL bundle for segmentation routes
): # Full viewport component (outerHTML swap)
"""
Update the viewport with a new card count.
Does a full viewport swap because the number of slots changes.
Saves the new visible_count and is_auto_mode to state.
"""def _handle_seg_save_width(
state_store: WorkflowStateStore, # The workflow state store
workflow_id: str, # The workflow identifier
sess, # FastHTML session object
card_width: int, # Card stack width in rem
) -> None: # No response body (swap=none on client)
"""
Save the card stack width to server state.
Called via debounced HTMX POST from the width slider.
Returns nothing since the client uses hx-swap='none'.
"""def init_card_stack_router(
state_store: WorkflowStateStore, # The workflow state store
workflow_id: str, # The workflow identifier
prefix: str, # Route prefix (e.g., "/workflow/seg/card_stack")
urls: SegmentationUrls, # URL bundle (populated after routes defined)
) -> Tuple[APIRouter, Dict[str, Callable]]: # (router, route_dict)
"Initialize card stack routes for segmentation."card_stack_config (card_stack_config.ipynb)
Card stack configuration constants for the Phase 2 segmentation UI
Import
from cjm_transcript_segmentation.components.card_stack_config import (
SEG_CS_CONFIG,
SEG_CS_IDS,
SEG_CS_BTN_IDS,
SEG_TS_CONFIG,
SEG_TS_IDS
)Variables
SEG_CS_CONFIG
SEG_CS_IDS
SEG_CS_BTN_IDS
SEG_TS_CONFIG
SEG_TS_IDScore (core.ipynb)
Segmentation step state management helpers
Import
from cjm_transcript_segmentation.routes.core import (
WorkflowStateStore,
DEBUG_SEG_STATE,
DEFAULT_MAX_HISTORY_DEPTH,
SegContext
)Functions
def _to_segments(
segment_dicts: List[Dict[str, Any]] # Serialized segment dictionaries
) -> List[TextSegment]: # Deserialized TextSegment objects
"Convert segment dictionaries to TextSegment objects."def _get_seg_state(
state_store: WorkflowStateStore, # The workflow state store
workflow_id: str, # The workflow identifier
session_id: str # Session identifier string
) -> SegmentationStepState: # Segmentation step state dictionary
"Get the segmentation step state from the workflow state store."def _get_selection_state(
state_store: WorkflowStateStore, # The workflow state store
workflow_id: str, # The workflow identifier
session_id: str # Session identifier string
) -> Dict[str, Any]: # Selection step state dictionary
"Get the selection step state (Phase 1) from the workflow state store."def _build_card_stack_state(
ctx: SegContext, # Loaded segmentation context
active_mode: str = None, # Active interaction mode (e.g. "split")
) -> CardStackState: # Card stack state for library functions
"Build a CardStackState from segmentation context for library calls."def _load_seg_context(
state_store: WorkflowStateStore, # The workflow state store
workflow_id: str, # The workflow identifier
session_id: str # Session identifier string
) -> SegContext: # Common segmentation state values
"Load commonly-needed segmentation state values in a single call."def _update_seg_state(
state_store: WorkflowStateStore, # The workflow state store
workflow_id: str, # The workflow identifier
session_id: str, # Session identifier string
segments: List[Dict[str, Any]] = None, # Updated segments (None = don't change)
initial_segments: List[Dict[str, Any]] = None, # Initial segments for reset (None = don't change)
focused_index: int = None, # Updated focused index (None = don't change)
is_initialized: bool = None, # Initialization flag (None = don't change)
history: List[Dict[str, Any]] = None, # Updated history (None = don't change)
visible_count: int = None, # Visible card count (None = don't change)
is_auto_mode: bool = None, # Auto-adjust mode flag (None = don't change)
card_width: int = None, # Card stack width in rem (None = don't change)
) -> None
"Update the segmentation step state in the workflow state store."def _push_history(
state_store: WorkflowStateStore, # The workflow state store
workflow_id: str, # The workflow identifier
session_id: str, # Session identifier string
current_segments: List[Dict[str, Any]], # Current segments to snapshot
focused_index: int, # Current focused index to snapshot
max_history_depth: int = DEFAULT_MAX_HISTORY_DEPTH, # Maximum history stack depth
) -> int: # New history depth after push
"Push current state to history stack before making changes."Classes
class SegContext(NamedTuple):
"Common segmentation state values loaded by handlers."Variables
DEBUG_SEG_STATE = False
DEFAULT_MAX_HISTORY_DEPTH = 50handlers (handlers.ipynb)
Segmentation workflow handlers — init, split, merge, undo, reset, AI split
Import
from cjm_transcript_segmentation.routes.handlers import (
DEBUG_SEG_HANDLERS,
build_mutation_response,
SegInitResult,
SegMutationResult,
init_workflow_router
)Functions
def build_mutation_response(
segment_dicts:List[Dict[str, Any]], # Serialized segments
focused_index:int, # Currently focused segment index
visible_count:int, # Number of visible cards
history_depth:int, # Current undo history depth
urls:SegmentationUrls, # URL bundle
is_split_mode:bool=False, # Whether split mode is active
extra_actions:tuple=(), # Additional toolbar elements (e.g., FA controls, sync toggle)
nltk_split_disabled:bool=False, # Whether NLTK Split button is disabled
) -> Tuple: # OOB elements (slots + progress + focus + stats + toolbar + source position)
"""
Build the standard OOB response for mutation handlers.
Returns domain-specific OOB elements. The combined layer wrapper
adds cross-domain elements (mini-stats badge, alignment status).
"""async def _handle_seg_init(
state_store: WorkflowStateStore, # The workflow state store
workflow_id: str, # The workflow identifier
source_service: SourceService, # Service for fetching source blocks
segmentation_service: SegmentationService, # Service for NLTK sentence splitting
request, # FastHTML request object
sess, # FastHTML session object
urls: SegmentationUrls, # URL bundle for segmentation routes
visible_count: int = DEFAULT_VISIBLE_COUNT, # Number of visible cards
card_width: int = DEFAULT_CARD_WIDTH, # Card stack width in rem
) -> SegInitResult: # Pure domain result for wrapper to use
"""
Initialize segments from Phase 1 selected sources.
Returns pure domain data. The combined layer wrapper adds cross-domain
coordination (KB system, shared chrome, alignment status).
"""async def _handle_seg_split_result(
state_store: WorkflowStateStore, # The workflow state store
workflow_id: str, # The workflow identifier
request, # FastHTML request object
sess, # FastHTML session object
segment_index: int, # Index of segment to split
urls: SegmentationUrls, # URL bundle for segmentation routes
max_history_depth: int = DEFAULT_MAX_HISTORY_DEPTH, # Maximum history stack depth
) -> SegMutationResult: # Mutation result data
"Split a segment at the specified word position. Returns data, not OOB."async def _handle_seg_split(
state_store: WorkflowStateStore,
workflow_id: str,
request, sess,
segment_index: int,
urls: SegmentationUrls,
max_history_depth: int = DEFAULT_MAX_HISTORY_DEPTH,
): # OOB slot updates with stats, progress, focus, and toolbar
"Split a segment at the specified word position."def _build_merge_reject_flash(
prev_index:int, # Index of the segment above the boundary
curr_index:int, # Index of the segment below the boundary
) -> Div: # OOB div containing JS that flashes both boundary cards
"Build an OOB element that flashes both cards at a source boundary."def _handle_seg_merge_result(
state_store: WorkflowStateStore, # The workflow state store
workflow_id: str, # The workflow identifier
request, # FastHTML request object
sess, # FastHTML session object
segment_index: int, # Index of segment to merge (merges with previous)
urls: SegmentationUrls, # URL bundle for segmentation routes
max_history_depth: int = DEFAULT_MAX_HISTORY_DEPTH, # Maximum history stack depth
) -> SegMutationResult: # Mutation result data (extra_oob may contain merge rejection flash)
"Merge a segment with the previous segment. Returns data, not OOB."def _handle_seg_merge(
state_store: WorkflowStateStore,
workflow_id: str,
request, sess,
segment_index: int,
urls: SegmentationUrls,
max_history_depth: int = DEFAULT_MAX_HISTORY_DEPTH,
): # OOB slot updates with stats, progress, focus, and toolbar
"Merge a segment with the previous segment."def _handle_seg_undo_result(
state_store: WorkflowStateStore, # The workflow state store
workflow_id: str, # The workflow identifier
request, # FastHTML request object
sess, # FastHTML session object
urls: SegmentationUrls, # URL bundle for segmentation routes
) -> SegMutationResult: # Mutation result data
"Undo the last operation by restoring previous state from history. Returns data, not OOB."def _handle_seg_undo(
state_store: WorkflowStateStore,
workflow_id: str,
request, sess,
urls: SegmentationUrls,
): # OOB slot updates with stats, progress, focus, and toolbar
"Undo the last operation by restoring previous state from history."def _handle_seg_reset_result(
state_store: WorkflowStateStore, # The workflow state store
workflow_id: str, # The workflow identifier
request, # FastHTML request object
sess, # FastHTML session object
urls: SegmentationUrls, # URL bundle for segmentation routes
max_history_depth: int = DEFAULT_MAX_HISTORY_DEPTH, # Maximum history stack depth
) -> SegMutationResult: # Mutation result data
"Reset segments to the initial split result. Returns data, not OOB."def _handle_seg_reset(
state_store: WorkflowStateStore,
workflow_id: str,
request, sess,
urls: SegmentationUrls,
max_history_depth: int = DEFAULT_MAX_HISTORY_DEPTH,
): # OOB slot updates with stats, progress, focus, and toolbar
"Reset segments to the initial NLTK split result."async def _handle_seg_ai_split_result(
state_store: WorkflowStateStore, # The workflow state store
workflow_id: str, # The workflow identifier
segmentation_service: SegmentationService, # Service for NLTK sentence splitting
request, # FastHTML request object
sess, # FastHTML session object
urls: SegmentationUrls, # URL bundle for segmentation routes
max_history_depth: int = DEFAULT_MAX_HISTORY_DEPTH, # Maximum history stack depth
) -> SegMutationResult: # Mutation result data
"Re-run NLTK sentence splitting on all current text. Returns data, not OOB."async def _handle_seg_ai_split(
state_store: WorkflowStateStore,
workflow_id: str,
segmentation_service: SegmentationService,
request, sess,
urls: SegmentationUrls,
max_history_depth: int = DEFAULT_MAX_HISTORY_DEPTH,
): # OOB slot updates with stats, progress, focus, and toolbar
"Re-run NLTK sentence splitting on all current text."def init_workflow_router(
state_store: WorkflowStateStore, # The workflow state store
workflow_id: str, # The workflow identifier
source_service: SourceService, # Service for fetching source blocks
segmentation_service: SegmentationService, # Service for NLTK sentence splitting
prefix: str, # Route prefix (e.g., "/workflow/seg/workflow")
urls: SegmentationUrls, # URL bundle (populated after routes defined)
max_history_depth: int = DEFAULT_MAX_HISTORY_DEPTH, # Maximum history stack depth
handler_init: Callable = None, # Optional wrapped init handler
handler_split: Callable = None, # Optional wrapped split handler
handler_merge: Callable = None, # Optional wrapped merge handler
handler_undo: Callable = None, # Optional wrapped undo handler
handler_reset: Callable = None, # Optional wrapped reset handler
handler_ai_split: Callable = None, # Optional wrapped ai_split handler
) -> Tuple[APIRouter, Dict[str, Callable]]: # (router, route_dict)
"""
Initialize workflow routes for segmentation.
Accepts optional handler overrides for wrapping with cross-domain
coordination (e.g., KB system, shared chrome, alignment status).
"""Classes
class SegInitResult(NamedTuple):
"""
Result from pure segmentation init handler.
Contains domain-specific data for the combined layer wrapper to use
when building cross-domain OOB elements (KB system, shared chrome).
"""class SegMutationResult(NamedTuple):
"""
Result from a segmentation mutation handler (split, merge, undo, reset, NLTK split).
Contains data for the caller to build targeted OOB responses via
`build_mutation_response()`. The caller controls toolbar `extra_actions`
and any cross-domain OOB elements (alignment status, mini-stats).
"""Variables
DEBUG_SEG_HANDLERS = Truehelpers (helpers.ipynb)
Shared helper functions for the segmentation module
Import
from cjm_transcript_segmentation.components.helpers import *Functions
def _get_segmentation_state(
ctx: InteractionContext # Interaction context with state
) -> SegmentationStepState: # Typed segmentation step state
"Get the full segmentation step state from context."def _get_segments(
ctx: InteractionContext # Interaction context with state
) -> List[TextSegment]: # List of TextSegment objects
"Get the list of segments from step state as TextSegment objects."def _is_initialized(
ctx: InteractionContext # Interaction context with state
) -> bool: # True if segments have been initialized
"Check if segments have been initialized."def _get_visible_count(
ctx: InteractionContext, # Interaction context with state
default: int = 3, # Default visible card count
) -> int: # Number of visible cards in viewport
"Get the stored visible card count."def _get_card_width(
ctx: InteractionContext, # Interaction context with state
default: int = 80, # Default card width in rem
) -> int: # Card stack width in rem
"Get the stored card stack width."def _get_is_auto_mode(
ctx: InteractionContext, # Interaction context with state
) -> bool: # Whether card count is in auto-adjust mode
"Get whether the card count is in auto-adjust mode."def _get_history(
ctx: InteractionContext # Interaction context with state
) -> List[List[Dict[str, Any]]]: # Stack of segment snapshots
"Get the undo history stack."def _get_focused_index(
ctx: InteractionContext # Interaction context with state
) -> int: # Currently focused segment index
"Get the currently focused segment index."html_ids (html_ids.ipynb)
HTML ID constants for Phase 2 Left Column: Text Segmentation
Import
from cjm_transcript_segmentation.html_ids import (
SegmentationHtmlIds
)Classes
class SegmentationHtmlIds:
"HTML ID constants for Phase 2 Left Column: Text Segmentation."
def as_selector(
id_str:str # The HTML ID to convert
) -> str: # CSS selector with # prefix
"Convert an ID to a CSS selector format."
def segment_card(
index:int # Segment index in the decomposition
) -> str: # HTML ID for the segment card
"Generate HTML ID for a segment card."init (init.ipynb)
Router assembly for Phase 2 segmentation routes
Import
from cjm_transcript_segmentation.routes.init import (
WrappedHandlers,
init_segmentation_routers
)Functions
def init_segmentation_routers(
state_store:WorkflowStateStore, # The workflow state store
workflow_id:str, # The workflow identifier
source_service:SourceService, # Service for fetching source blocks
segmentation_service:SegmentationService, # Service for NLTK sentence splitting
prefix:str, # Base prefix for segmentation routes (e.g., "/workflow/seg")
max_history_depth:int=DEFAULT_MAX_HISTORY_DEPTH, # Maximum history stack depth
wrapped_handlers:WrappedHandlers=None, # Dict with 'init', 'split', 'merge', 'undo', 'reset', 'ai_split' keys
) -> Tuple[List[APIRouter], SegmentationUrls, Dict[str, Callable]]: # (routers, urls, merged_routes)
"""
Initialize and return all segmentation routers with URL bundle.
The wrapped_handlers dict should contain handlers that already have
cross-domain concerns (KB system, alignment status) handled by the
combined layer's wrapper factories.
"""keyboard_config (keyboard_config.ipynb)
Segmentation-specific keyboard actions, modes, and zone configuration
Import
from cjm_transcript_segmentation.components.keyboard_config import (
SD_SEG_ENTER_SPLIT_BTN,
SD_SEG_EXIT_SPLIT_BTN,
SD_SEG_SPLIT_BTN,
SD_SEG_MERGE_BTN,
SD_SEG_UNDO_BTN,
create_seg_kb_parts
)Functions
def create_seg_kb_parts(
ids:CardStackHtmlIds, # Card stack HTML IDs
button_ids:CardStackButtonIds, # Card stack button IDs for navigation
config:CardStackConfig, # Card stack configuration
) -> Tuple[FocusZone, tuple, tuple]: # (zone, actions, modes)
"""
Create segmentation-specific keyboard building blocks.
Returns a zone, actions tuple, and modes tuple for assembly into a shared
ZoneManager by the combined-level keyboard config.
"""Variables
SD_SEG_ENTER_SPLIT_BTN = 'sd-seg-enter-split-btn'
SD_SEG_EXIT_SPLIT_BTN = 'sd-seg-exit-split-btn'
SD_SEG_SPLIT_BTN = 'sd-seg-split-btn'
SD_SEG_MERGE_BTN = 'sd-seg-merge-btn'
SD_SEG_UNDO_BTN = 'sd-seg-undo-btn'models (models.ipynb)
Data models and URL bundles for Phase 2 Left Column: Text Segmentation
Import
from cjm_transcript_segmentation.models import (
TextSegment,
SegmentationStepState,
SegmentationUrls
)Classes
@dataclass
class TextSegment:
"A text segment during workflow processing before graph commit."
index: int # Sequence position (0-indexed)
text: str # Segment text content
source_id: Optional[str] # ID of source block
source_provider_id: Optional[str] # Source provider identifier
start_char: Optional[int] # Start character index in source
end_char: Optional[int] # End character index in source
def to_dict(self) -> Dict[str, Any]: # Dictionary representation
"""Convert to dictionary for JSON serialization."""
return asdict(self)
@classmethod
def from_dict(
cls,
data: Dict[str, Any] # Dictionary representation
) -> "TextSegment": # Reconstructed TextSegment
"Convert to dictionary for JSON serialization."
def from_dict(
cls,
data: Dict[str, Any] # Dictionary representation
) -> "TextSegment": # Reconstructed TextSegment
"Create from dictionary, filtering out legacy/unknown fields."class SegmentationStepState(TypedDict):
"State for Phase 2 (left column): Text Segmentation."@dataclass
class SegmentationUrls:
"URL bundle for Phase 2 segmentation route handlers and renderers."
card_stack: CardStackUrls = field(...)
split: str = '' # Execute split at word position
merge: str = '' # Merge segment with previous
enter_split: str = '' # Enter split mode for focused segment
exit_split: str = '' # Exit split mode
reset: str = '' # Reset to initial segments
ai_split: str = '' # AI (NLTK) re-split
undo: str = '' # Undo last operation
init: str = '' # Initialize segments from Phase 1segment_card (segment_card.ipynb)
Segment card component with view and split modes
Import
from cjm_transcript_segmentation.components.segment_card import (
render_segment_card,
create_segment_card_renderer
)Functions
def _render_card_metadata(
segment:TextSegment, # Segment to render metadata for
) -> Any: # Metadata component
"Render the left metadata column of a segment card."def _render_view_mode_content(
segment: TextSegment, # Segment to render
card_role: CardRole, # Role of this card in viewport
enter_split_url: str, # URL to enter split mode
) -> Any: # View mode content component
"Render the text content in view mode."def _render_split_mode_content(
segment:TextSegment, # Segment to render
caret_position:int, # Current caret position (token index)
split_url:str, # URL to execute split
exit_split_url:str, # URL to exit split mode
) -> Any: # Split mode content component
"Render the interactive token display in split mode."def _render_card_actions(
"Render hover-visible action buttons."def render_segment_card(
"Render a segment card with view or split mode content."def create_segment_card_renderer(
split_url: str = "", # URL to execute split
merge_url: str = "", # URL to merge with previous
enter_split_url: str = "", # URL to enter split mode
exit_split_url: str = "", # URL to exit split mode
is_split_mode: bool = False, # Whether split mode is active
caret_position: int = 0, # Caret position for split mode (word index)
source_boundaries: Set[int] = None, # Indices where source_id changes
) -> Callable: # Card renderer callback: (item, CardRenderContext) -> FT
"Create a card renderer callback for segment cards."segmentation (segmentation.ipynb)
Segmentation service for text decomposition via NLTK plugin
Import
from cjm_transcript_segmentation.services.segmentation import (
SegmentationService,
split_segment_at_position,
merge_text_segments,
reindex_segments,
reconstruct_source_blocks
)Functions
def split_segment_at_position(
segment: TextSegment, # Segment to split
char_position: int # Character position to split at (relative to segment text)
) -> tuple[TextSegment, TextSegment]: # Two new segments
"Split a segment into two at the given character position."def merge_text_segments(
first: TextSegment, # First segment (earlier in sequence)
second: TextSegment, # Second segment (later in sequence)
separator: str = " " # Text separator between segments
) -> TextSegment: # Merged segment
"Merge two adjacent segments into one."def reindex_segments(
segments: List[TextSegment] # List of segments to reindex
) -> List[TextSegment]: # Segments with corrected indices
"Reindex segments to have sequential indices starting from 0."def reconstruct_source_blocks(
segment_dicts: List[Dict[str, Any]], # Serialized working segments
) -> List[SourceBlock]: # Reconstructed source blocks with combined text
"Reconstruct source blocks by grouping segments by source_id and combining text."Classes
class SegmentationService:
def __init__(
self,
plugin_manager: PluginManager, # Plugin manager for accessing text plugin
plugin_name: str = "cjm-text-plugin-nltk" # Name of the text processing plugin
)
"Service for text segmentation via NLTK plugin."
def __init__(
self,
plugin_manager: PluginManager, # Plugin manager for accessing text plugin
plugin_name: str = "cjm-text-plugin-nltk" # Name of the text processing plugin
)
"Initialize the segmentation service."
def is_available(self) -> bool: # True if plugin is loaded and ready
"""Check if the text processing plugin is available."""
return self._manager.get_plugin(self._plugin_name) is not None
def ensure_loaded(
self,
config: Optional[Dict[str, Any]] = None # Optional plugin configuration
) -> bool: # True if successfully loaded
"Check if the text processing plugin is available."
def ensure_loaded(
self,
config: Optional[Dict[str, Any]] = None # Optional plugin configuration
) -> bool: # True if successfully loaded
"Ensure the text processing plugin is loaded."
async def split_sentences_async(
self,
text: str, # Text to split into sentences
source_id: Optional[str] = None, # Source block ID for traceability
source_provider_id: Optional[str] = None # Source provider identifier for traceability
) -> List[TextSegment]: # List of TextSegment objects
"Split text into sentences asynchronously."
def split_sentences(
self,
text: str, # Text to split into sentences
source_id: Optional[str] = None, # Source block ID for traceability
source_provider_id: Optional[str] = None # Source provider identifier for traceability
) -> List[TextSegment]: # List of TextSegment objects
"Split text into sentences synchronously."
async def split_combined_sources_async(
self,
source_blocks: List[SourceBlock] # Ordered list of source blocks
) -> List[TextSegment]: # Combined list of TextSegments with proper traceability
"Split multiple source blocks into segments with proper source tracking."step_renderer (step_renderer.ipynb)
Composable renderers for the Phase 2 segmentation column and shared chrome
Import
from cjm_transcript_segmentation.components.step_renderer import (
render_toolbar,
render_seg_stats,
render_seg_source_position,
render_seg_column_body,
render_seg_footer_content,
render_seg_mini_stats_text
)Functions
def render_toolbar(
reset_url: str, # URL for reset action
ai_split_url: str, # URL for NLTK split action
undo_url: str, # URL for undo action
can_undo: bool, # Whether undo is available
extra_actions: tuple = (), # Additional elements for the right action group
nltk_split_disabled: bool = False, # Whether NLTK Split button is disabled (current = NLTK pre-split)
oob: bool = False, # Whether to render as OOB swap
) -> Any: # Toolbar component
"Render the segmentation toolbar with action buttons."def render_seg_stats(
segments: List[TextSegment], # Current segments
oob: bool = False, # Whether to render as OOB swap
) -> Any: # Statistics component
"Render segmentation statistics."def render_seg_source_position(
segments: List[TextSegment], # Current segments
focused_index: int = 0, # Currently focused segment index
oob: bool = False, # Whether to render as OOB swap
) -> Any: # Source position indicator (empty if single source)
"Render source position indicator for the focused segment."def render_seg_column_body(
segments:List[TextSegment], # Segments to display
focused_index:int, # Currently focused segment index
visible_count:int, # Number of visible cards in viewport
card_width:int, # Card stack width in rem
urls:SegmentationUrls, # URL bundle for all segmentation routes
kb_system:Optional[Any]=None, # Rendered keyboard system (None when KB managed externally)
) -> Any: # Div with id=COLUMN_CONTENT containing viewport + infrastructure
"Render the segmentation column content area with card stack viewport."def render_seg_footer_content(
segments:List[TextSegment], # Current segments
focused_index:int, # Currently focused segment index
) -> Any: # Footer content with progress indicator, source position, and stats
"Render footer content with progress indicator, source position, and segment statistics."def render_seg_mini_stats_text(
segments:List[TextSegment], # Current segments
) -> str: # Compact stats string for column header badge
"Generate compact stats string for the segmentation column header badge."utils (utils.ipynb)
Text processing utilities for segmentation: word counting, position mapping, and statistics
Import
from cjm_transcript_segmentation.utils import (
count_words,
word_index_to_char_position,
calculate_segment_stats,
get_source_boundaries,
get_source_count,
get_source_position
)Functions
def count_words(
text: str # Text to count words in
) -> int: # Word count
"Count the number of whitespace-delimited words in text."def word_index_to_char_position(
text: str, # Full text
word_index: int # Word index (0-based, split happens before this word)
) -> int: # Character position for split
"Convert a word index to the character position where a split should occur."def calculate_segment_stats(
segments: List["TextSegment"] # List of segments to analyze
) -> Dict[str, Any]: # Statistics dictionary with total_words, total_segments
"Calculate aggregate statistics for a list of segments."def get_source_boundaries(
segments: List["TextSegment"], # Ordered list of segments
) -> Set[int]: # Indices where source_id changes from the previous segment
"""
Find indices where source_id changes between adjacent segments.
A boundary at index N means segment[N].source_id differs from
segment[N-1].source_id. Both must be non-None for a boundary to exist.
"""def get_source_count(
segments: List["TextSegment"], # Ordered list of segments
) -> int: # Number of unique non-None source_ids
"Count the number of unique audio sources in the segment list."def get_source_position(
segments: List["TextSegment"], # Ordered list of segments
focused_index: int, # Index of the focused segment
) -> Optional[int]: # 1-based position in ordered unique sources, or None
"""
Get the source position (1-based) of the focused segment.
Returns which source group the focused segment belongs to,
based on order of first appearance.
"""