# Text Processing Plugin Interface


<!-- WARNING: THIS FILE WAS AUTOGENERATED! DO NOT EDIT! -->

------------------------------------------------------------------------

### TextProcessingPlugin

``` python

def TextProcessingPlugin(
    args:VAR_POSITIONAL, kwargs:VAR_KEYWORD
):

```

*Abstract base class for plugins that perform NLP operations.*

Extends PluginInterface with text processing requirements: - `execute`:
Dispatch method for different text operations - `split_sentences`: Split
text into sentence spans with character positions

## How It Works

    Host Process                              Worker Process (Isolated Env)
    ┌─────────────────────┐                  ┌─────────────────────────────┐
    │                     │                  │                             │
    │ plugin.execute(     │   HTTP/JSON      │  TextProcessingPlugin       │
    │   action="split_    │ ─────────────────▶    .execute(                │
    │     sentences",     │                  │       action="split_        │
    │   text="Hello..."   │                  │         sentences",        │
    │ )                   │                  │       text="Hello..."       │
    │                     │                  │    )                        │
    │                     │  ◀───────────────│                             │
    │ # Receives JSON     │   JSON response  │  # Returns TextProcessResult│
    │ # with spans        │                  │  # serialized to JSON       │
    └─────────────────────┘                  └─────────────────────────────┘

The `execute()` method acts as a dispatcher that routes to specific
operations like `split_sentences()`.

## Example Implementation

A minimal text processing plugin that demonstrates the interface:

``` python
import re
from typing import Optional, List
from cjm_text_plugin_system.core import TextSpan, TextProcessResult

class ExampleTextPlugin(TextProcessingPlugin):
    """Example implementation showing how to create a text processing plugin."""
    
    def __init__(self):
        self._config: Dict[str, Any] = {}

    @property
    def name(self) -> str:
        return "example-text-processor"
    
    @property
    def version(self) -> str:
        return "1.0.0"

    def initialize(self, config: Optional[Dict[str, Any]] = None) -> None:
        """Initialize with configuration."""
        self._config = config or {}

    def execute(
        self,
        action: str = "split_sentences",
        **kwargs
    ) -> Dict[str, Any]:
        """Dispatch to the appropriate text processing method."""
        if action == "split_sentences":
            result = self.split_sentences(**kwargs)
            return {
                "spans": [span.to_dict() for span in result.spans],
                "metadata": result.metadata
            }
        else:
            raise ValueError(f"Unknown action: {action}")

    def split_sentences(
        self,
        text: str,
        **kwargs
    ) -> TextProcessResult:
        """Split text into sentences using simple regex."""
        spans: List[TextSpan] = []
        
        # Simple sentence splitting on .!? followed by whitespace
        pattern = r'[^.!?]*[.!?]'
        
        for match in re.finditer(pattern, text):
            sentence = match.group().strip()
            if sentence:
                spans.append(TextSpan(
                    text=sentence,
                    start_char=match.start(),
                    end_char=match.end(),
                    label="sentence"
                ))
        
        return TextProcessResult(
            spans=spans,
            metadata={"processor": self.name, "method": "regex"}
        )

    def get_config_schema(self) -> Dict[str, Any]:
        """Return JSON Schema for configuration."""
        return {
            "type": "object",
            "properties": {}
        }

    def get_current_config(self) -> Dict[str, Any]:
        """Return current configuration."""
        return self._config

    def cleanup(self) -> None:
        """Clean up resources."""
        pass
```

``` python
# Test the example plugin
plugin = ExampleTextPlugin()
plugin.initialize({})

print(f"Plugin: {plugin.name} v{plugin.version}")
print(f"Config schema: {plugin.get_config_schema()}")
print(f"Current config: {plugin.get_current_config()}")
```

    Plugin: example-text-processor v1.0.0
    Config schema: {'type': 'object', 'properties': {}}
    Current config: {}

``` python
# Test split_sentences directly
text = "Hello world. How are you? I am fine!"
result = plugin.split_sentences(text)

print(f"\nInput: '{text}'")
print(f"Spans found: {len(result.spans)}")
print(f"Metadata: {result.metadata}")

for i, span in enumerate(result.spans):
    print(f"  {i}: '{span.text}' [{span.start_char}:{span.end_char}]")
    # Verify mapping back to original
    assert text[span.start_char:span.end_char].strip() == span.text
```


    Input: 'Hello world. How are you? I am fine!'
    Spans found: 3
    Metadata: {'processor': 'example-text-processor', 'method': 'regex'}
      0: 'Hello world.' [0:12]
      1: 'How are you?' [12:25]
      2: 'I am fine!' [25:36]

``` python
# Test execute() dispatcher (as Worker would call it)
json_result = plugin.execute(action="split_sentences", text=text)

print(f"\nJSON result from execute():")
print(f"  spans: {len(json_result['spans'])} items")
print(f"  metadata: {json_result['metadata']}")

for span_dict in json_result['spans']:
    print(f"    - {span_dict}")
```


    JSON result from execute():
      spans: 3 items
      metadata: {'processor': 'example-text-processor', 'method': 'regex'}
        - {'text': 'Hello world.', 'start_char': 0, 'end_char': 12, 'label': 'sentence', 'metadata': {}}
        - {'text': 'How are you?', 'start_char': 12, 'end_char': 25, 'label': 'sentence', 'metadata': {}}
        - {'text': 'I am fine!', 'start_char': 25, 'end_char': 36, 'label': 'sentence', 'metadata': {}}

``` python
# Cleanup
plugin.cleanup()
```
