# Forced Alignment Storage


<!-- WARNING: THIS FILE WAS AUTOGENERATED! DO NOT EDIT! -->

## ForcedAlignmentRow

A dataclass representing a single row in the standardized forced
alignments table. This provides a type-safe way to work with stored
forced alignment results.

------------------------------------------------------------------------

<a
href="https://github.com/cj-mills/cjm-transcription-plugin-system/blob/main/cjm_transcription_plugin_system/forced_alignment_storage.py#L19"
target="_blank" style="float:right; font-size:smaller">source</a>

### ForcedAlignmentRow

``` python

def ForcedAlignmentRow(
    job_id:str, audio_path:str, audio_hash:str, text:str, text_hash:str, items:Optional=None, metadata:Optional=None,
    created_at:Optional=None
)->None:

```

*A single row from the forced_alignments table.*

``` python
# Test ForcedAlignmentRow creation
row = ForcedAlignmentRow(
    job_id="fa_job_abc123",
    audio_path="/tmp/test.mp3",
    audio_hash="sha256:" + "a" * 64,
    text="Hello world",
    text_hash="sha256:" + "b" * 64,
    items=[{"text": "Hello", "start_time": 0.0, "end_time": 0.5}],
    metadata={"model": "qwen3-forced-aligner"}
)

print(f"Row: job_id={row.job_id}, text={row.text}")
print(f"Audio hash: {row.audio_hash[:20]}...")
print(f"Text hash: {row.text_hash[:20]}...")
```

    Row: job_id=fa_job_abc123, text=Hello world
    Audio hash: sha256:aaaaaaaaaaaaa...
    Text hash: sha256:bbbbbbbbbbbbb...

## ForcedAlignmentStorage

Standardized SQLite storage that all forced alignment plugins should
use. Defines the canonical schema for the `forced_alignments` table with
content hash columns for traceability.

**Schema:**

``` sql
CREATE TABLE IF NOT EXISTS forced_alignments (
    id INTEGER PRIMARY KEY AUTOINCREMENT,
    job_id TEXT UNIQUE NOT NULL,
    audio_path TEXT NOT NULL,
    audio_hash TEXT NOT NULL,
    text TEXT NOT NULL,
    text_hash TEXT NOT NULL,
    items JSON,
    metadata JSON,
    created_at REAL NOT NULL
);
```

The `audio_hash` and `text_hash` columns use the self-describing
`"algo:hexdigest"` format (e.g., `"sha256:a3f2b8..."`), enabling
downstream consumers to verify content integrity.

------------------------------------------------------------------------

<a
href="https://github.com/cj-mills/cjm-transcription-plugin-system/blob/main/cjm_transcription_plugin_system/forced_alignment_storage.py#L31"
target="_blank" style="float:right; font-size:smaller">source</a>

### ForcedAlignmentStorage

``` python

def ForcedAlignmentStorage(
    db_path:str, # Absolute path to the SQLite database file
):

```

*Standardized SQLite storage for forced alignment results.*

## Testing

``` python
import tempfile
import os

# Create storage with temp database
tmp_db = tempfile.NamedTemporaryFile(suffix=".db", delete=False)
storage = ForcedAlignmentStorage(tmp_db.name)

print(f"Storage initialized at: {tmp_db.name}")
```

    Storage initialized at: /tmp/tmpwfon0jh4.db

``` python
# Save a forced alignment result with hashes
test_text = "November the 10th, Wednesday, 9 p.m."
text_hash = hash_bytes(test_text.encode())
audio_hash = "sha256:" + "e3b0c44298" * 6 + "e3b0"  # Simulated audio hash

test_items = [
    {"text": "November", "start_time": 1.04, "end_time": 1.6},
    {"text": "the", "start_time": 1.6, "end_time": 1.68},
    {"text": "10th", "start_time": 1.76, "end_time": 2.08},
    {"text": "Wednesday", "start_time": 2.48, "end_time": 3.04},
    {"text": "9", "start_time": 3.84, "end_time": 4.16},
    {"text": "pm", "start_time": 4.16, "end_time": 4.64},
]

storage.save(
    job_id="fa_test_001",
    audio_path="/tmp/test_audio.mp3",
    audio_hash=audio_hash,
    text=test_text,
    text_hash=text_hash,
    items=test_items,
    metadata={"model_id": "Qwen/Qwen3-ForcedAligner-0.6B", "language": "English", "word_count": 6}
)

print(f"Saved fa_test_001")
print(f"Text hash: {text_hash}")
```

    Saved fa_test_001
    Text hash: sha256:bb6827d460a3f8dff6d8b5704e237e54141853a0c58d1d7aefa49417ec6e49ad

``` python
# Retrieve by job ID
row = storage.get_by_job_id("fa_test_001")
assert row is not None
assert row.job_id == "fa_test_001"
assert row.text == test_text
assert row.text_hash == text_hash
assert row.audio_hash == audio_hash
assert row.items is not None
assert len(row.items) == 6
assert row.items[0]["text"] == "November"
assert row.items[0]["start_time"] == 1.04
assert row.metadata["model_id"] == "Qwen/Qwen3-ForcedAligner-0.6B"
assert row.created_at is not None

print(f"Retrieved: {row.job_id}")
print(f"Text: {row.text}")
print(f"Items: {len(row.items)} words")
print(f"First item: {row.items[0]}")
print(f"Created at: {row.created_at}")
```

    Retrieved: fa_test_001
    Text: November the 10th, Wednesday, 9 p.m.
    Items: 6 words
    First item: {'text': 'November', 'start_time': 1.04, 'end_time': 1.6}
    Created at: 1773970556.1129797

``` python
# Missing job returns None
missing = storage.get_by_job_id("nonexistent")
assert missing is None
print("get_by_job_id returns None for missing job: OK")
```

    get_by_job_id returns None for missing job: OK

``` python
# Save another and test list_jobs
storage.save(
    job_id="fa_test_002",
    audio_path="/tmp/test_audio_2.mp3",
    audio_hash="sha256:" + "f" * 64,
    text="Second alignment test.",
    text_hash=hash_bytes(b"Second alignment test."),
    items=[{"text": "Second", "start_time": 0.0, "end_time": 0.5}],
)

jobs = storage.list_jobs()
assert len(jobs) == 2
# Newest first
assert jobs[0].job_id == "fa_test_002"
assert jobs[1].job_id == "fa_test_001"

print(f"list_jobs returned {len(jobs)} rows (newest first): {[j.job_id for j in jobs]}")
```

    list_jobs returned 2 rows (newest first): ['fa_test_002', 'fa_test_001']

``` python
# Test text verification
assert storage.verify_text("fa_test_001") == True
print("verify_text with unchanged text: True")

# Tamper with text directly in DB
with sqlite3.connect(tmp_db.name) as con:
    con.execute("UPDATE forced_alignments SET text = 'TAMPERED' WHERE job_id = 'fa_test_001'")

assert storage.verify_text("fa_test_001") == False
print("verify_text after tampering: False")

# Missing job returns None
assert storage.verify_text("nonexistent") is None
print("verify_text for missing job: None")
```

    verify_text with unchanged text: True
    verify_text after tampering: False
    verify_text for missing job: None

``` python
# Cleanup
os.unlink(tmp_db.name)
print("Cleanup complete")
```

    Cleanup complete
