Content Hashing Utilities

Shared cryptographic hashing primitives for content integrity verification

hash_bytes

Computes a cryptographic hash of byte content, returning a self-describing string in "algo:hexdigest" format. This format embeds the algorithm name, making hashes forward-compatible if the algorithm changes.

source

hash_bytes


def hash_bytes(
    content:bytes, # Byte content to hash
    algo:str='sha256', # Hash algorithm name (e.g., "sha256", "sha3_256")
)->str: # Hash string in "algo:hexdigest" format

Compute a hash of byte content.

result = hash_bytes(b"hello world")
print(f"hash_bytes result: {result}")

# Check format
algo, digest = result.split(":", 1)
assert algo == "sha256"
assert len(digest) == 64  # SHA-256 produces 64 hex chars

# Deterministic
assert hash_bytes(b"hello world") == hash_bytes(b"hello world")

# Different content produces different hash
assert hash_bytes(b"hello world") != hash_bytes(b"hello World")

print("hash_bytes tests passed")

hash_bytes result: sha256:b94d27b9934d3e08a52e52d7da7dabfac484efe37a5380ee9088f7ace2efcde9
hash_bytes tests passed

# Custom algorithm
sha512_result = hash_bytes(b"test", algo="sha512")
print(f"SHA-512 result: {sha512_result[:30]}...")
assert sha512_result.startswith("sha512:")
assert len(sha512_result.split(":")[1]) == 128  # SHA-512 produces 128 hex chars

print("Custom algorithm test passed")

SHA-512 result: sha512:ee26b0dd4af7e749aa1a8ee...
Custom algorithm test passed

hash_file

Stream-hashes a file without loading it entirely into memory. Uses chunked reads suitable for large files (audio, video, etc.).

source

hash_file


def hash_file(
    path:Union, # Path to file to hash
    algo:str='sha256', # Hash algorithm name
    chunk_size:int=8192, # Read chunk size in bytes
)->str: # Hash string in "algo:hexdigest" format

Stream-hash a file without loading it entirely into memory.

import tempfile
import os

# Create a temp file with known content
with tempfile.NamedTemporaryFile(delete=False, mode='wb') as tmp:
    tmp.write(b"hello world")
    tmp_path = tmp.name

# Hash the file
file_hash = hash_file(tmp_path)
print(f"hash_file result: {file_hash}")

# Should match hash_bytes of the same content
assert file_hash == hash_bytes(b"hello world")

# Test with Path object
assert hash_file(Path(tmp_path)) == file_hash

# Cleanup
os.unlink(tmp_path)
print("hash_file tests passed")

hash_file result: sha256:b94d27b9934d3e08a52e52d7da7dabfac484efe37a5380ee9088f7ace2efcde9
hash_file tests passed

verify_hash

Verifies byte content against an expected hash string. Automatically extracts the algorithm from the "algo:hexdigest" format.

source

verify_hash


def verify_hash(
    content:bytes, # Content to verify
    expected:str, # Expected hash in "algo:hexdigest" format
)->bool: # True if content matches expected hash

Verify content against an expected hash string.

original = b"hello world"
h = hash_bytes(original)

# Matching content
assert verify_hash(original, h) == True

# Modified content
assert verify_hash(b"hello World", h) == False

# Works with different algorithms
h_sha512 = hash_bytes(original, algo="sha512")
assert verify_hash(original, h_sha512) == True
assert verify_hash(b"tampered", h_sha512) == False

print("verify_hash tests passed")

verify_hash tests passed