import re
from typing import Optional, List
from cjm_text_plugin_system.core import TextSpan, TextProcessResult
class ExampleTextPlugin(TextProcessingPlugin):
"""Example implementation showing how to create a text processing plugin."""
def __init__(self):
self._config: Dict[str, Any] = {}
@property
def name(self) -> str:
return "example-text-processor"
@property
def version(self) -> str:
return "1.0.0"
def initialize(self, config: Optional[Dict[str, Any]] = None) -> None:
"""Initialize with configuration."""
self._config = config or {}
def execute(
self,
action: str = "split_sentences",
**kwargs
) -> Dict[str, Any]:
"""Dispatch to the appropriate text processing method."""
if action == "split_sentences":
result = self.split_sentences(**kwargs)
return {
"spans": [span.to_dict() for span in result.spans],
"metadata": result.metadata
}
else:
raise ValueError(f"Unknown action: {action}")
def split_sentences(
self,
text: str,
**kwargs
) -> TextProcessResult:
"""Split text into sentences using simple regex."""
spans: List[TextSpan] = []
# Simple sentence splitting on .!? followed by whitespace
pattern = r'[^.!?]*[.!?]'
for match in re.finditer(pattern, text):
sentence = match.group().strip()
if sentence:
spans.append(TextSpan(
text=sentence,
start_char=match.start(),
end_char=match.end(),
label="sentence"
))
return TextProcessResult(
spans=spans,
metadata={"processor": self.name, "method": "regex"}
)
def get_config_schema(self) -> Dict[str, Any]:
"""Return JSON Schema for configuration."""
return {
"type": "object",
"properties": {}
}
def get_current_config(self) -> Dict[str, Any]:
"""Return current configuration."""
return self._config
def cleanup(self) -> None:
"""Clean up resources."""
passText Processing Plugin Interface
Domain-specific plugin interface for text processing operations
TextProcessingPlugin
def TextProcessingPlugin(
args:VAR_POSITIONAL, kwargs:VAR_KEYWORD
):
Abstract base class for plugins that perform NLP operations.
Extends PluginInterface with text processing requirements: - execute: Dispatch method for different text operations - split_sentences: Split text into sentence spans with character positions
How It Works
Host Process Worker Process (Isolated Env)
┌─────────────────────┐ ┌─────────────────────────────┐
│ │ │ │
│ plugin.execute( │ HTTP/JSON │ TextProcessingPlugin │
│ action="split_ │ ─────────────────▶ .execute( │
│ sentences", │ │ action="split_ │
│ text="Hello..." │ │ sentences", │
│ ) │ │ text="Hello..." │
│ │ │ ) │
│ │ ◀───────────────│ │
│ # Receives JSON │ JSON response │ # Returns TextProcessResult│
│ # with spans │ │ # serialized to JSON │
└─────────────────────┘ └─────────────────────────────┘
The execute() method acts as a dispatcher that routes to specific operations like split_sentences().
Example Implementation
A minimal text processing plugin that demonstrates the interface:
# Test the example plugin
plugin = ExampleTextPlugin()
plugin.initialize({})
print(f"Plugin: {plugin.name} v{plugin.version}")
print(f"Config schema: {plugin.get_config_schema()}")
print(f"Current config: {plugin.get_current_config()}")Plugin: example-text-processor v1.0.0
Config schema: {'type': 'object', 'properties': {}}
Current config: {}
# Test split_sentences directly
text = "Hello world. How are you? I am fine!"
result = plugin.split_sentences(text)
print(f"\nInput: '{text}'")
print(f"Spans found: {len(result.spans)}")
print(f"Metadata: {result.metadata}")
for i, span in enumerate(result.spans):
print(f" {i}: '{span.text}' [{span.start_char}:{span.end_char}]")
# Verify mapping back to original
assert text[span.start_char:span.end_char].strip() == span.text
Input: 'Hello world. How are you? I am fine!'
Spans found: 3
Metadata: {'processor': 'example-text-processor', 'method': 'regex'}
0: 'Hello world.' [0:12]
1: 'How are you?' [12:25]
2: 'I am fine!' [25:36]
# Test execute() dispatcher (as Worker would call it)
json_result = plugin.execute(action="split_sentences", text=text)
print(f"\nJSON result from execute():")
print(f" spans: {len(json_result['spans'])} items")
print(f" metadata: {json_result['metadata']}")
for span_dict in json_result['spans']:
print(f" - {span_dict}")
JSON result from execute():
spans: 3 items
metadata: {'processor': 'example-text-processor', 'method': 'regex'}
- {'text': 'Hello world.', 'start_char': 0, 'end_char': 12, 'label': 'sentence', 'metadata': {}}
- {'text': 'How are you?', 'start_char': 12, 'end_char': 25, 'label': 'sentence', 'metadata': {}}
- {'text': 'I am fine!', 'start_char': 25, 'end_char': 36, 'label': 'sentence', 'metadata': {}}
# Cleanup
plugin.cleanup()