Working on a version for easing the inclusion of new analyzers

a4bdb8ff · Arturo Montejo Ráez · dd4c4c96 · a4bdb8ff · a4bdb8ff · a4bdb8ff
Commit a4bdb8ff authored Oct 29, 2025 by Arturo Montejo Ráez
Showing with 133 additions and 0 deletions
README.md
textflow/EmojiAnalyzer.py
textflow/PolarityAnalyzer.py
textflow/SimpleAnalyzer.py
textflow/__init__.py
textflow/registry.py
--- a/README.md
+++ b/README.md
@@ -724,3 +724,35 @@ The steps to create an analyzer are:
+## Quick path for adding a new analyzer
+If your analyzer just needs to process a list of inputs (e.g., texts from metadata) and return one result per input, you can extend `SimpleAnalyzer` and optionally register it for easy instantiation:
+1) Extend `textflow.SimpleAnalyzer.SimpleAnalyzer` and implement `compute(items)`.
+2) Optionally set `apply_to_metadata = False` if you want to analyze children sequences rather than metadata items.
+3) (Optional) Register with a name using the decorator `@textflow.register_analyzer("my_analyzer")`, then instantiate with `textflow.create_analyzer("my_analyzer", **kwargs)`.
+Minimal example:
+```python
+from textflow.SimpleAnalyzer import SimpleAnalyzer
+from textflow import register_analyzer
+@register_analyzer("word_count")
+class WordCountAnalyzer(SimpleAnalyzer):
+    apply_to_metadata = True  # expects list of texts from metadata
+    def compute(self, items):
+        return [{"words": len(text.split())} for text in items]
+```
+Usage:
+```python
+an = textflow.create_analyzer("word_count")
+an.analyze(sequence, tag="volumetry", levelOfAnalyzer="text")
+```
--- a/textflow/EmojiAnalyzer.py
+++ b/textflow/EmojiAnalyzer.py
 from typing import Optional
 import emoji
 from textflow.Analyzer import Analyzer
+from textflow.registry import register_analyzer
 import re
+@register_analyzer("emoji")
 class EmojiAnalyzer(Analyzer):
    """
    A class that provides methods to analyze the different emojis of the text of a sequence.

--- a/textflow/PolarityAnalyzer.py
+++ b/textflow/PolarityAnalyzer.py
 from typing import Optional
 from textflow.Analyzer import Analyzer
+from textflow.registry import register_analyzer
 from transformers import pipeline
+@register_analyzer("polarity")
 class PolarityAnalyzer(Analyzer):
    """
    A class that provides methods to analyze the polarity of the text of a sequence.

--- a/textflow/SimpleAnalyzer.py
+++ b/textflow/SimpleAnalyzer.py
+from abc import ABC, abstractmethod
+from typing import Any, Iterable, List, Optional
+from textflow.Analyzer import Analyzer
+class SimpleAnalyzer(Analyzer, ABC):
+    """
+    Convenience base class to implement analyzers with minimal boilerplate.
+    Subclasses should implement `compute(items)` where `items` is a list of
+    inputs gathered from the sequence based on `apply_to_metadata`.
+    - If `apply_to_metadata` is True (default), `items` will be a list of
+      metadata values (e.g., texts) collected via `Sequence.filterMetadata`.
+    - If False, `items` will be a list of child sequences (via `Sequence.filter`).
+    """
+    apply_to_metadata: bool = True
+    @abstractmethod
+    def compute(self, items: List[Any]) -> List[Any]:  # pragma: no cover
+        """Return a result list, one output per input item."""
+        raise NotImplementedError
+    def analyze(
+        self,
+        sequence,
+        tag,
+        levelOfAnalyzer,
+        levelOfResult: Optional[str] = "",
+    ):
+        super().analyze(self.compute, sequence, tag, levelOfAnalyzer, levelOfResult, self.apply_to_metadata)
--- a/textflow/__init__.py
+++ b/textflow/__init__.py
 __version__ = '0.1.0'
+from textflow.registry import (
+    register_analyzer,
+    get_analyzer_class,
+    create_analyzer,
+    available_analyzers,
+)
+__all__ = [
+    "__version__",
+    "register_analyzer",
+    "get_analyzer_class",
+    "create_analyzer",
+    "available_analyzers",
+]
--- a/textflow/registry.py
+++ b/textflow/registry.py
+from typing import Callable, Dict, Iterable, List, Optional, Type, TypeVar
+from textflow.Analyzer import Analyzer
+T = TypeVar("T", bound=Analyzer)
+ANALYZER_REGISTRY: Dict[str, Type[Analyzer]] = {}
+def register_analyzer(name: str) -> Callable[[Type[T]], Type[T]]:
+    """
+    Class decorator to register an analyzer by name.
+    Usage:
+        @register_analyzer("polarity")
+        class PolarityAnalyzer(Analyzer):
+            ...
+    """
+    def _decorator(cls: Type[T]) -> Type[T]:
+        key = name.lower()
+        ANALYZER_REGISTRY[key] = cls
+        return cls
+    return _decorator
+def get_analyzer_class(name: str) -> Type[Analyzer]:
+    key = name.lower()
+    if key not in ANALYZER_REGISTRY:
+        available = ", ".join(sorted(ANALYZER_REGISTRY.keys()))
+        raise KeyError(f"Analyzer '{name}' not found. Available: {available}")
+    return ANALYZER_REGISTRY[key]
+def create_analyzer(name: str, **kwargs) -> Analyzer:
+    """
+    Instantiate an analyzer by registered name.
+    """
+    cls = get_analyzer_class(name)
+    return cls(**kwargs)  # type: ignore[call-arg]
+def available_analyzers() -> List[str]:
+    return sorted(ANALYZER_REGISTRY.keys())