Implementing some analyzers

e9f93ab1 · Estrella Vallecillo · f3e06440 · e9f93ab1 · e9f93ab1 · e9f93ab1
Commit e9f93ab1 authored May 16, 2022 by Estrella Vallecillo
Showing with 435 additions and 300 deletions
poetry.lock
pyproject.toml
textflow/Analyzer.py
textflow/ComplexityAnalyzer.py
textflow/EmotionAnalyzer.py
textflow/LemmaAnalyzer.py
textflow/POSAnalyzer.py
textflow/PolarityAnalyzer.py
textflow/Sequence.py
textflow/StylometryAnalyzer.py
textflow/VolumetryAnalyzer.py
--- a/poetry.lock
+++ b/poetry.lock
@@ -60,7 +60,7 @@ unicode_backport = ["unicodedata2"]

 [[package]]
 name = "click"
-version = "8.1.2"
+version = "8.1.3"
 description = "Composable command line interface toolkit"
 category = "main"
 optional = false
@@ -87,7 +87,7 @@ python-versions = "*"

 [[package]]
 name = "filelock"
-version = "3.6.0"
+version = "3.7.0"
 description = "A platform independent file lock."
 category = "main"
 optional = false
@@ -98,16 +98,8 @@ docs = ["furo (>=2021.8.17b43)", "sphinx (>=4.1)", "sphinx-autodoc-typehints (>=
 testing = ["covdefaults (>=1.2.0)", "coverage (>=4)", "pytest (>=4)", "pytest-cov", "pytest-timeout (>=1.4.2)"]

 [[package]]
-name = "functools"
-version = "0.5"
-description = "Fast tools for functional programming"
-category = "main"
-optional = false
-python-versions = "*"
-
-[[package]]
 name = "huggingface-hub"
-version = "0.5.1"
+version = "0.6.0"
 description = "Client library to download and publish models on the huggingface.co hub"
 category = "main"
 optional = false
@@ -124,6 +116,7 @@ typing-extensions = ">=3.7.4.3"
 [package.extras]
 all = ["pytest", "datasets", "soundfile", "black (>=22.0,<23.0)", "isort (>=5.5.4)", "flake8 (>=3.8.3)"]
 dev = ["pytest", "datasets", "soundfile", "black (>=22.0,<23.0)", "isort (>=5.5.4)", "flake8 (>=3.8.3)"]
+fastai = ["toml", "fastai (>=2.4)", "fastcore (>=1.3.27)"]
 quality = ["black (>=22.0,<23.0)", "isort (>=5.5.4)", "flake8 (>=3.8.3)"]
 tensorflow = ["tensorflow", "pydot", "graphviz"]
 testing = ["pytest", "datasets", "soundfile"]
@@ -180,7 +173,7 @@ python-versions = ">=3.7"

 [[package]]
 name = "more-itertools"
-version = "8.12.0"
+version = "8.13.0"
 description = "More routines for operating on iterables, beyond itertools"
 category = "dev"
 optional = false
@@ -301,14 +294,14 @@ email = ["email-validator (>=1.0.3)"]

 [[package]]
 name = "pyparsing"
-version = "3.0.7"
-description = "Python parsing module"
+version = "3.0.9"
+description = "pyparsing module - Classes and methods to define and execute parsing grammars"
 category = "main"
 optional = false
-python-versions = ">=3.6"
+python-versions = ">=3.6.8"

 [package.extras]
-diagrams = ["jinja2", "railroad-diagrams"]
+diagrams = ["railroad-diagrams", "jinja2"]

 [[package]]
 name = "pytest"
@@ -367,29 +360,6 @@ socks = ["PySocks (>=1.5.6,!=1.5.7)", "win-inet-pton"]
 use_chardet_on_py3 = ["chardet (>=3.0.2,<5)"]

 [[package]]
-name = "sacremoses"
-version = "0.0.53"
-description = "SacreMoses"
-category = "main"
-optional = false
-python-versions = "*"
-
-[package.dependencies]
-click = "*"
-joblib = "*"
-regex = "*"
-six = "*"
-tqdm = "*"
-
-[[package]]
-name = "six"
-version = "1.16.0"
-description = "Python 2 and 3 compatibility utilities"
-category = "main"
-optional = false
-python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*"
-
-[[package]]
 name = "smart-open"
 version = "5.2.1"
 description = "Utils for streaming large files (S3, HDFS, GCS, Azure Blob Storage, gzip, bz2...)"
@@ -540,6 +510,17 @@ docs = ["sphinx", "sphinx-rtd-theme", "setuptools-rust"]
 testing = ["pytest", "requests", "numpy", "datasets"]

 [[package]]
+name = "torch"
+version = "1.11.0"
+description = "Tensors and Dynamic neural networks in Python with strong GPU acceleration"
+category = "main"
+optional = false
+python-versions = ">=3.7.0"
+
+[package.dependencies]
+typing-extensions = "*"
+
+[[package]]
 name = "tqdm"
 version = "4.64.0"
 description = "Fast, Extensible Progress Meter"
@@ -558,11 +539,11 @@ telegram = ["requests"]

 [[package]]
 name = "transformers"
-version = "4.18.0"
-description = "State-of-the-art Natural Language Processing for TensorFlow 2.0 and PyTorch"
+version = "4.19.1"
+description = "State-of-the-art Machine Learning for JAX, PyTorch and TensorFlow"
 category = "main"
 optional = false
-python-versions = ">=3.6.0"
+python-versions = ">=3.7.0"

 [package.dependencies]
 filelock = "*"
@@ -572,22 +553,22 @@ packaging = ">=20.0"
 pyyaml = ">=5.1"
 regex = "!=2019.12.17"
 requests = "*"
-sacremoses = "*"
 tokenizers = ">=0.11.1,<0.11.3 || >0.11.3,<0.13"
 tqdm = ">=4.27"

 [package.extras]
-all = ["tensorflow (>=2.3)", "onnxconverter-common", "tf2onnx", "torch (>=1.0)", "jax (>=0.2.8,!=0.3.2)", "jaxlib (>=0.1.65)", "flax (>=0.3.5)", "optax (>=0.0.8)", "sentencepiece (>=0.1.91,!=0.1.92)", "protobuf", "tokenizers (>=0.11.1,!=0.11.3,<0.13)", "torchaudio", "librosa", "pyctcdecode (>=0.3.0)", "phonemizer", "pillow", "optuna", "ray", "sigopt", "timm", "codecarbon (==1.2.0)"]
+all = ["tensorflow (>=2.3)", "onnxconverter-common", "tf2onnx", "torch (>=1.0)", "jax (>=0.2.8,!=0.3.2,<=0.3.6)", "jaxlib (>=0.1.65,<=0.3.6)", "flax (>=0.3.5)", "optax (>=0.0.8)", "sentencepiece (>=0.1.91,!=0.1.92)", "protobuf", "tokenizers (>=0.11.1,!=0.11.3,<0.13)", "torchaudio", "librosa", "pyctcdecode (>=0.3.0)", "phonemizer", "pillow", "optuna", "ray", "sigopt", "timm", "codecarbon (==1.2.0)"]
 audio = ["librosa", "pyctcdecode (>=0.3.0)", "phonemizer"]
 codecarbon = ["codecarbon (==1.2.0)"]
-deepspeed = ["deepspeed (>=0.6.0)"]
-dev = ["tensorflow (>=2.3)", "onnxconverter-common", "tf2onnx", "torch (>=1.0)", "jax (>=0.2.8,!=0.3.2)", "jaxlib (>=0.1.65)", "flax (>=0.3.5)", "optax (>=0.0.8)", "sentencepiece (>=0.1.91,!=0.1.92)", "protobuf", "tokenizers (>=0.11.1,!=0.11.3,<0.13)", "torchaudio", "librosa", "pyctcdecode (>=0.3.0)", "phonemizer", "pillow", "optuna", "ray", "sigopt", "timm", "codecarbon (==1.2.0)", "pytest", "pytest-xdist", "timeout-decorator", "parameterized", "psutil", "datasets", "pytest-timeout", "black (>=22.0,<23.0)", "sacrebleu (>=1.4.12,<2.0.0)", "rouge-score", "nltk", "GitPython (<3.1.19)", "hf-doc-builder (>=0.2.0)", "faiss-cpu", "cookiecutter (==1.7.3)", "isort (>=5.5.4)", "flake8 (>=3.8.3)", "fugashi (>=1.0)", "ipadic (>=1.0.0,<2.0)", "unidic-lite (>=1.0.7)", "unidic (>=1.0.2)", "hf-doc-builder", "scikit-learn"]
-dev-tensorflow = ["pytest", "pytest-xdist", "timeout-decorator", "parameterized", "psutil", "datasets", "pytest-timeout", "black (>=22.0,<23.0)", "sacrebleu (>=1.4.12,<2.0.0)", "rouge-score", "nltk", "GitPython (<3.1.19)", "hf-doc-builder (>=0.2.0)", "faiss-cpu", "cookiecutter (==1.7.3)", "tensorflow (>=2.3)", "onnxconverter-common", "tf2onnx", "sentencepiece (>=0.1.91,!=0.1.92)", "protobuf", "tokenizers (>=0.11.1,!=0.11.3,<0.13)", "pillow", "isort (>=5.5.4)", "flake8 (>=3.8.3)", "hf-doc-builder", "scikit-learn", "onnxruntime (>=1.4.0)", "onnxruntime-tools (>=1.4.2)", "librosa", "pyctcdecode (>=0.3.0)", "phonemizer"]
-dev-torch = ["pytest", "pytest-xdist", "timeout-decorator", "parameterized", "psutil", "datasets", "pytest-timeout", "black (>=22.0,<23.0)", "sacrebleu (>=1.4.12,<2.0.0)", "rouge-score", "nltk", "GitPython (<3.1.19)", "hf-doc-builder (>=0.2.0)", "faiss-cpu", "cookiecutter (==1.7.3)", "torch (>=1.0)", "sentencepiece (>=0.1.91,!=0.1.92)", "protobuf", "tokenizers (>=0.11.1,!=0.11.3,<0.13)", "torchaudio", "librosa", "pyctcdecode (>=0.3.0)", "phonemizer", "pillow", "optuna", "ray", "sigopt", "timm", "codecarbon (==1.2.0)", "isort (>=5.5.4)", "flake8 (>=3.8.3)", "fugashi (>=1.0)", "ipadic (>=1.0.0,<2.0)", "unidic-lite (>=1.0.7)", "unidic (>=1.0.2)", "hf-doc-builder", "scikit-learn", "onnxruntime (>=1.4.0)", "onnxruntime-tools (>=1.4.2)"]
-docs = ["tensorflow (>=2.3)", "onnxconverter-common", "tf2onnx", "torch (>=1.0)", "jax (>=0.2.8,!=0.3.2)", "jaxlib (>=0.1.65)", "flax (>=0.3.5)", "optax (>=0.0.8)", "sentencepiece (>=0.1.91,!=0.1.92)", "protobuf", "tokenizers (>=0.11.1,!=0.11.3,<0.13)", "torchaudio", "librosa", "pyctcdecode (>=0.3.0)", "phonemizer", "pillow", "optuna", "ray", "sigopt", "timm", "codecarbon (==1.2.0)", "hf-doc-builder"]
+deepspeed = ["deepspeed (>=0.6.4)"]
+deepspeed-testing = ["deepspeed (>=0.6.4)", "pytest", "pytest-xdist", "timeout-decorator", "parameterized", "psutil", "datasets", "pytest-timeout", "black (>=22.0,<23.0)", "sacrebleu (>=1.4.12,<2.0.0)", "rouge-score", "nltk", "GitPython (<3.1.19)", "hf-doc-builder (>=0.3.0)", "sacremoses", "rjieba", "faiss-cpu", "cookiecutter (==1.7.3)", "optuna"]
+dev = ["tensorflow (>=2.3)", "onnxconverter-common", "tf2onnx", "torch (>=1.0)", "jax (>=0.2.8,!=0.3.2,<=0.3.6)", "jaxlib (>=0.1.65,<=0.3.6)", "flax (>=0.3.5)", "optax (>=0.0.8)", "sentencepiece (>=0.1.91,!=0.1.92)", "protobuf", "tokenizers (>=0.11.1,!=0.11.3,<0.13)", "torchaudio", "librosa", "pyctcdecode (>=0.3.0)", "phonemizer", "pillow", "optuna", "ray", "sigopt", "timm", "codecarbon (==1.2.0)", "pytest", "pytest-xdist", "timeout-decorator", "parameterized", "psutil", "datasets", "pytest-timeout", "black (>=22.0,<23.0)", "sacrebleu (>=1.4.12,<2.0.0)", "rouge-score", "nltk", "GitPython (<3.1.19)", "hf-doc-builder (>=0.3.0)", "sacremoses", "rjieba", "faiss-cpu", "cookiecutter (==1.7.3)", "isort (>=5.5.4)", "flake8 (>=3.8.3)", "fugashi (>=1.0)", "ipadic (>=1.0.0,<2.0)", "unidic-lite (>=1.0.7)", "unidic (>=1.0.2)", "hf-doc-builder", "scikit-learn"]
+dev-tensorflow = ["pytest", "pytest-xdist", "timeout-decorator", "parameterized", "psutil", "datasets", "pytest-timeout", "black (>=22.0,<23.0)", "sacrebleu (>=1.4.12,<2.0.0)", "rouge-score", "nltk", "GitPython (<3.1.19)", "hf-doc-builder (>=0.3.0)", "sacremoses", "rjieba", "faiss-cpu", "cookiecutter (==1.7.3)", "tensorflow (>=2.3)", "onnxconverter-common", "tf2onnx", "sentencepiece (>=0.1.91,!=0.1.92)", "protobuf", "tokenizers (>=0.11.1,!=0.11.3,<0.13)", "pillow", "isort (>=5.5.4)", "flake8 (>=3.8.3)", "hf-doc-builder", "scikit-learn", "onnxruntime (>=1.4.0)", "onnxruntime-tools (>=1.4.2)", "librosa", "pyctcdecode (>=0.3.0)", "phonemizer"]
+dev-torch = ["pytest", "pytest-xdist", "timeout-decorator", "parameterized", "psutil", "datasets", "pytest-timeout", "black (>=22.0,<23.0)", "sacrebleu (>=1.4.12,<2.0.0)", "rouge-score", "nltk", "GitPython (<3.1.19)", "hf-doc-builder (>=0.3.0)", "sacremoses", "rjieba", "faiss-cpu", "cookiecutter (==1.7.3)", "torch (>=1.0)", "sentencepiece (>=0.1.91,!=0.1.92)", "protobuf", "tokenizers (>=0.11.1,!=0.11.3,<0.13)", "torchaudio", "librosa", "pyctcdecode (>=0.3.0)", "phonemizer", "pillow", "optuna", "ray", "sigopt", "timm", "codecarbon (==1.2.0)", "isort (>=5.5.4)", "flake8 (>=3.8.3)", "fugashi (>=1.0)", "ipadic (>=1.0.0,<2.0)", "unidic-lite (>=1.0.7)", "unidic (>=1.0.2)", "hf-doc-builder", "scikit-learn", "onnxruntime (>=1.4.0)", "onnxruntime-tools (>=1.4.2)"]
+docs = ["tensorflow (>=2.3)", "onnxconverter-common", "tf2onnx", "torch (>=1.0)", "jax (>=0.2.8,!=0.3.2,<=0.3.6)", "jaxlib (>=0.1.65,<=0.3.6)", "flax (>=0.3.5)", "optax (>=0.0.8)", "sentencepiece (>=0.1.91,!=0.1.92)", "protobuf", "tokenizers (>=0.11.1,!=0.11.3,<0.13)", "torchaudio", "librosa", "pyctcdecode (>=0.3.0)", "phonemizer", "pillow", "optuna", "ray", "sigopt", "timm", "codecarbon (==1.2.0)", "hf-doc-builder"]
 docs_specific = ["hf-doc-builder"]
 fairscale = ["fairscale (>0.3)"]
-flax = ["jax (>=0.2.8,!=0.3.2)", "jaxlib (>=0.1.65)", "flax (>=0.3.5)", "optax (>=0.0.8)"]
+flax = ["jax (>=0.2.8,!=0.3.2,<=0.3.6)", "jaxlib (>=0.1.65,<=0.3.6)", "flax (>=0.3.5)", "optax (>=0.0.8)"]
 flax-speech = ["librosa", "pyctcdecode (>=0.3.0)", "phonemizer"]
 ftfy = ["ftfy"]
 integrations = ["optuna", "ray", "sigopt"]
@@ -596,7 +577,7 @@ modelcreation = ["cookiecutter (==1.7.3)"]
 onnx = ["onnxconverter-common", "tf2onnx", "onnxruntime (>=1.4.0)", "onnxruntime-tools (>=1.4.2)"]
 onnxruntime = ["onnxruntime (>=1.4.0)", "onnxruntime-tools (>=1.4.2)"]
 optuna = ["optuna"]
-quality = ["black (>=22.0,<23.0)", "isort (>=5.5.4)", "flake8 (>=3.8.3)", "GitPython (<3.1.19)", "hf-doc-builder (>=0.2.0)"]
+quality = ["black (>=22.0,<23.0)", "isort (>=5.5.4)", "flake8 (>=3.8.3)", "GitPython (<3.1.19)", "hf-doc-builder (>=0.3.0)"]
 ray = ["ray"]
 retrieval = ["faiss-cpu", "datasets"]
 sagemaker = ["sagemaker (>=2.31.0)"]
@@ -605,7 +586,7 @@ serving = ["pydantic", "uvicorn", "fastapi", "starlette"]
 sigopt = ["sigopt"]
 sklearn = ["scikit-learn"]
 speech = ["torchaudio", "librosa", "pyctcdecode (>=0.3.0)", "phonemizer"]
-testing = ["pytest", "pytest-xdist", "timeout-decorator", "parameterized", "psutil", "datasets", "pytest-timeout", "black (>=22.0,<23.0)", "sacrebleu (>=1.4.12,<2.0.0)", "rouge-score", "nltk", "GitPython (<3.1.19)", "hf-doc-builder (>=0.2.0)", "faiss-cpu", "cookiecutter (==1.7.3)"]
+testing = ["pytest", "pytest-xdist", "timeout-decorator", "parameterized", "psutil", "datasets", "pytest-timeout", "black (>=22.0,<23.0)", "sacrebleu (>=1.4.12,<2.0.0)", "rouge-score", "nltk", "GitPython (<3.1.19)", "hf-doc-builder (>=0.3.0)", "sacremoses", "rjieba", "faiss-cpu", "cookiecutter (==1.7.3)"]
 tf = ["tensorflow (>=2.3)", "onnxconverter-common", "tf2onnx"]
 tf-cpu = ["tensorflow-cpu (>=2.3)", "onnxconverter-common", "tf2onnx"]
 tf-speech = ["librosa", "pyctcdecode (>=0.3.0)", "phonemizer"]
@@ -613,7 +594,7 @@ timm = ["timm"]
 tokenizers = ["tokenizers (>=0.11.1,!=0.11.3,<0.13)"]
 torch = ["torch (>=1.0)"]
 torch-speech = ["torchaudio", "librosa", "pyctcdecode (>=0.3.0)", "phonemizer"]
-torchhub = ["filelock", "huggingface-hub (>=0.1.0,<1.0)", "importlib-metadata", "numpy (>=1.17)", "packaging (>=20.0)", "protobuf", "regex (!=2019.12.17)", "requests", "sacremoses", "sentencepiece (>=0.1.91,!=0.1.92)", "torch (>=1.0)", "tokenizers (>=0.11.1,!=0.11.3,<0.13)", "tqdm (>=4.27)"]
+torchhub = ["filelock", "huggingface-hub (>=0.1.0,<1.0)", "importlib-metadata", "numpy (>=1.17)", "packaging (>=20.0)", "protobuf", "regex (!=2019.12.17)", "requests", "sentencepiece (>=0.1.91,!=0.1.92)", "torch (>=1.0)", "tokenizers (>=0.11.1,!=0.11.3,<0.13)", "tqdm (>=4.27)"]
 vision = ["pillow"]

 [[package]]
@@ -672,8 +653,8 @@ python-versions = "*"

 [metadata]
 lock-version = "1.1"
-python-versions = "^3.8"
-content-hash = "d3639d0f322d79260a5fe40ea43817a72bcd16885b66b7a1b1bb3ec355d37264"
+python-versions = "3.8"
+content-hash = "f559d5695f1365c162f02c2146df48de52ad2d38e1b4a26476c7a662dc065365"

 [metadata.files]
 atomicwrites = [
@@ -715,8 +696,8 @@ charset-normalizer = [
    {file = "charset_normalizer-2.0.12-py3-none-any.whl", hash = "sha256:6881edbebdb17b39b4eaaa821b438bf6eddffb4468cf344f09f89def34a8b1df"},
 ]
 click = [
-    {file = "click-8.1.2-py3-none-any.whl", hash = "sha256:24e1a4a9ec5bf6299411369b208c1df2188d9eb8d916302fe6bf03faed227f1e"},
-    {file = "click-8.1.2.tar.gz", hash = "sha256:479707fe14d9ec9a0757618b7a100a0ae4c4e236fac5b7f80ca68028141a1a72"},
+    {file = "click-8.1.3-py3-none-any.whl", hash = "sha256:bb4d8133cb15a609f44e8213d9b391b0809795062913b383c62be0ee95b1db48"},
+    {file = "click-8.1.3.tar.gz", hash = "sha256:7682dc8afb30297001674575ea00d1814d808d6a36af415a82bd481d37ba7b8e"},
 ]
 colorama = [
    {file = "colorama-0.4.4-py2.py3-none-any.whl", hash = "sha256:9f47eda37229f68eee03b24b9748937c7dc3868f906e8ba69fbcbdd3bc5dc3e2"},
@@ -741,15 +722,12 @@ cymem = [
    {file = "cymem-2.0.6.tar.gz", hash = "sha256:169725b5816959d34de2545b33fee6a8021a6e08818794a426c5a4f981f17e5e"},
 ]
 filelock = [
-    {file = "filelock-3.6.0-py3-none-any.whl", hash = "sha256:f8314284bfffbdcfa0ff3d7992b023d4c628ced6feb957351d4c48d059f56bc0"},
-    {file = "filelock-3.6.0.tar.gz", hash = "sha256:9cd540a9352e432c7246a48fe4e8712b10acb1df2ad1f30e8c070b82ae1fed85"},
-]
-functools = [
-    {file = "functools-0.5.tar.gz", hash = "sha256:596ed8999dee419c0749a41bfdd82e4697e80ea27ee01c716003ef55be9a54c5"},
+    {file = "filelock-3.7.0-py3-none-any.whl", hash = "sha256:c7b5fdb219b398a5b28c8e4c1893ef5f98ece6a38c6ab2c22e26ec161556fed6"},
+    {file = "filelock-3.7.0.tar.gz", hash = "sha256:b795f1b42a61bbf8ec7113c341dad679d772567b936fbd1bf43c9a238e673e20"},
 ]
 huggingface-hub = [
-    {file = "huggingface_hub-0.5.1-py3-none-any.whl", hash = "sha256:b9fd1f567a3fb16e73acc613e78d075d1926d4b0c5c56ba08c4f125707b50c70"},
-    {file = "huggingface_hub-0.5.1.tar.gz", hash = "sha256:d90d657dca0d6a577f640ff684a58da8e5c76258e485100e885a0e7307e2eb12"},
+    {file = "huggingface_hub-0.6.0-py3-none-any.whl", hash = "sha256:585d72adade562a1f7038acf39eb7677b7649bdc0ce082b70f99e01164d9d8b5"},
+    {file = "huggingface_hub-0.6.0.tar.gz", hash = "sha256:f5109065222185d129933d44159e483a9e3378c577127d0281e4c921dfadbd23"},
 ]
 idna = [
    {file = "idna-3.3-py3-none-any.whl", hash = "sha256:84d9dd047ffa80596e0f246e2eab0b391788b0503584e8945f2368256d2735ff"},
@@ -810,8 +788,8 @@ markupsafe = [
    {file = "MarkupSafe-2.1.1.tar.gz", hash = "sha256:7f91197cc9e48f989d12e4e6fbc46495c446636dfc81b9ccf50bb0ec74b91d4b"},
 ]
 more-itertools = [
-    {file = "more-itertools-8.12.0.tar.gz", hash = "sha256:7dc6ad46f05f545f900dd59e8dfb4e84a4827b97b3cfecb175ea0c7d247f6064"},
-    {file = "more_itertools-8.12.0-py3-none-any.whl", hash = "sha256:43e6dd9942dffd72661a2c4ef383ad7da1e6a3e968a927ad7a6083ab410a688b"},
+    {file = "more-itertools-8.13.0.tar.gz", hash = "sha256:a42901a0a5b169d925f6f217cd5a190e32ef54360905b9c39ee7db5313bfec0f"},
+    {file = "more_itertools-8.13.0-py3-none-any.whl", hash = "sha256:c5122bffc5f104d37c1626b8615b511f3427aa5389b94d61e5ef8236bfbc3ddb"},
 ]
 murmurhash = [
    {file = "murmurhash-1.0.7-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:966d2efec6e01aa32c5774c44906724efca00da3507f06faa11acafb47ea1230"},
@@ -916,8 +894,8 @@ pydantic = [
    {file = "pydantic-1.8.2.tar.gz", hash = "sha256:26464e57ccaafe72b7ad156fdaa4e9b9ef051f69e175dbbb463283000c05ab7b"},
 ]
 pyparsing = [
-    {file = "pyparsing-3.0.7-py3-none-any.whl", hash = "sha256:a6c06a88f252e6c322f65faf8f418b16213b51bdfaece0524c1c1bc30c63c484"},
-    {file = "pyparsing-3.0.7.tar.gz", hash = "sha256:18ee9022775d270c55187733956460083db60b37d0d0fb357445f3094eed3eea"},
+    {file = "pyparsing-3.0.9-py3-none-any.whl", hash = "sha256:5026bae9a10eeaefb61dab2f09052b9f4307d44aee4eda64b309723d8d206bbc"},
+    {file = "pyparsing-3.0.9.tar.gz", hash = "sha256:2b020ecf7d21b687f219b71ecad3631f644a47f01403fa1d1036b0c6416d70fb"},
 ]
 pytest = [
    {file = "pytest-5.4.3-py3-none-any.whl", hash = "sha256:5c0db86b698e8f170ba4582a492248919255fcd4c79b1ee64ace34301fb589a1"},
@@ -1038,13 +1016,6 @@ requests = [
    {file = "requests-2.27.1-py2.py3-none-any.whl", hash = "sha256:f22fa1e554c9ddfd16e6e41ac79759e17be9e492b3587efa038054674760e72d"},
    {file = "requests-2.27.1.tar.gz", hash = "sha256:68d7c56fd5a8999887728ef304a6d12edc7be74f1cfa47714fc8b414525c9a61"},
 ]
-sacremoses = [
-    {file = "sacremoses-0.0.53.tar.gz", hash = "sha256:43715868766c643b35de4b8046cce236bfe59a7fa88b25eaf6ddf02bacf53a7a"},
-]
-six = [
-    {file = "six-1.16.0-py2.py3-none-any.whl", hash = "sha256:8abb2f1d86890a2dfb989f9a77cfcfd3e47c2a354b01111771326f8aa26e0254"},
-    {file = "six-1.16.0.tar.gz", hash = "sha256:1e61c37477a1626458e36f7b1d82aa5c9b094fa4802892072e49de9c60c4c926"},
-]
 smart-open = [
    {file = "smart_open-5.2.1-py3-none-any.whl", hash = "sha256:71d14489da58b60ce12fc3ecb823facc59a8b23cd1b58edb97175640350d3a62"},
    {file = "smart_open-5.2.1.tar.gz", hash = "sha256:75abf758717a92a8f53aa96953f0c245c8cedf8e1e4184903db3659b419d4c17"},
@@ -1146,13 +1117,34 @@ tokenizers = [
    {file = "tokenizers-0.12.1-cp39-cp39-win_amd64.whl", hash = "sha256:2158baf80cbc09259bfd6e0e0fc4597b611e7a72ad5443dad63918a90f1dd304"},
    {file = "tokenizers-0.12.1.tar.gz", hash = "sha256:070746f86efa6c873db341e55cf17bb5e7bdd5450330ca8eca542f5c3dab2c66"},
 ]
+torch = [
+    {file = "torch-1.11.0-cp310-cp310-manylinux1_x86_64.whl", hash = "sha256:62052b50fffc29ca7afc0c04ef8206b6f1ca9d10629cb543077e12967e8d0398"},
+    {file = "torch-1.11.0-cp310-cp310-manylinux2014_aarch64.whl", hash = "sha256:866bfba29ac98dec35d893d8e17eaec149d0ac7a53be7baae5c98069897db667"},
+    {file = "torch-1.11.0-cp310-cp310-win_amd64.whl", hash = "sha256:951640fb8db308a59d9b510e7d1ad910aff92913323bbe4bc75435347ddd346d"},
+    {file = "torch-1.11.0-cp310-none-macosx_10_9_x86_64.whl", hash = "sha256:5d77b5ece78fdafa5c7f42995ff9474399d22571cd6b2de21a5d666306a2ff8c"},
+    {file = "torch-1.11.0-cp310-none-macosx_11_0_arm64.whl", hash = "sha256:b5a38682769b544c875ecc34bcb81fbad5c922139b61319aacffcfd8a32f528c"},
+    {file = "torch-1.11.0-cp37-cp37m-manylinux1_x86_64.whl", hash = "sha256:f82d77695a60626f2b7382d85bc566de8a6b3e50d32080755abc040db802e419"},
+    {file = "torch-1.11.0-cp37-cp37m-manylinux2014_aarch64.whl", hash = "sha256:b96654d42566080a134e784705f33f8536b3b95b5dcde357ed7879b1692a5f78"},
+    {file = "torch-1.11.0-cp37-cp37m-win_amd64.whl", hash = "sha256:8ee7c2e8d7f7020d5bfbc1bb91b9591044c26bbd0cee5e4f694cfd7ed8649260"},
+    {file = "torch-1.11.0-cp37-none-macosx_10_9_x86_64.whl", hash = "sha256:6860b1d1bf0bb0b67a6bd47f85a0e4c825b518eea13b5d6101999dbbcbd5bc0c"},
+    {file = "torch-1.11.0-cp38-cp38-manylinux1_x86_64.whl", hash = "sha256:4322aa29f50da7f404db06cdf30896ea67b09f673af4a985afc7162bc897864d"},
+    {file = "torch-1.11.0-cp38-cp38-manylinux2014_aarch64.whl", hash = "sha256:e4d2e0ddd652f30e94cff750220324ec45705d4ecc69658f773b3cb1c7a28dd0"},
+    {file = "torch-1.11.0-cp38-cp38-win_amd64.whl", hash = "sha256:34ce5ea4d8d85da32cdbadb50d4585106901e9f8a3527991daa70c13a09de1f7"},
+    {file = "torch-1.11.0-cp38-none-macosx_10_9_x86_64.whl", hash = "sha256:0ccc85cd06227a3edf809e2c795fd5762c3d4e8a38b5c9f744c6e7cf841361bb"},
+    {file = "torch-1.11.0-cp38-none-macosx_11_0_arm64.whl", hash = "sha256:c1554e49d74f1b2c3e7202d77056ba2dd7465437585bac64062b580f714a44e9"},
+    {file = "torch-1.11.0-cp39-cp39-manylinux1_x86_64.whl", hash = "sha256:58c7814502b1c129a650d7092033bbb0bbd64faf1a7941631aaa1aeaddc37570"},
+    {file = "torch-1.11.0-cp39-cp39-manylinux2014_aarch64.whl", hash = "sha256:831cf588f01dda9409e75576741d2823453990dee2983d670f2584b37a01adf7"},
+    {file = "torch-1.11.0-cp39-cp39-win_amd64.whl", hash = "sha256:44a1d02fd20f827f0f36dc26fdcfc45e793806a6ad52769a22260655a77a4369"},
+    {file = "torch-1.11.0-cp39-none-macosx_10_9_x86_64.whl", hash = "sha256:50fd9bf85c578c871c28f1cb0ace9dfc6024401c7f399b174fb0f370899f4454"},
+    {file = "torch-1.11.0-cp39-none-macosx_11_0_arm64.whl", hash = "sha256:0e48af66ad755f0f9c5f2664028a414f57c49d6adc37e77e06fe0004da4edb61"},
+]
 tqdm = [
    {file = "tqdm-4.64.0-py2.py3-none-any.whl", hash = "sha256:74a2cdefe14d11442cedf3ba4e21a3b84ff9a2dbdc6cfae2c34addb2a14a5ea6"},
    {file = "tqdm-4.64.0.tar.gz", hash = "sha256:40be55d30e200777a307a7585aee69e4eabb46b4ec6a4b4a5f2d9f11e7d5408d"},
 ]
 transformers = [
-    {file = "transformers-4.18.0-py3-none-any.whl", hash = "sha256:6ae54fc29bd4bba5b0230d429cb55b8b3eb5feb9e3c9913c61203999f1f0c2c9"},
-    {file = "transformers-4.18.0.tar.gz", hash = "sha256:16f7751c44f31d8f9a3811bccd80f1995e1cb0ffd9b7de60ef6ede2ab90a6fd4"},
+    {file = "transformers-4.19.1-py3-none-any.whl", hash = "sha256:16d3dd257d459c2598e2548a9e6875c10b7db5e44494d93b3c0a5c60afad667f"},
+    {file = "transformers-4.19.1.tar.gz", hash = "sha256:6fb30ee534a25b6b3fc7064c280b7f44abf8c9bd1fb358860ebe4fd392bf15f5"},
 ]
 typer = [
    {file = "typer-0.4.1-py3-none-any.whl", hash = "sha256:e8467f0ebac0c81366c2168d6ad9f888efdfb6d4e1d3d5b4a004f46fa444b5c3"},

--- a/pyproject.toml
+++ b/pyproject.toml
@@ -5,14 +5,16 @@ description = "A text analysis library for Python"
 authors = ["Jaime Collado <jcollado@ujaen.es>", "Estrella Vallecillo <mevr0003@red.ujaen.es>"]

 [tool.poetry.dependencies]
-python = "^3.8"
+python = "3.8"
 nltk = "^3.7"
 spacy = "^3.3.0"
-transformers = "^4.18.0"
+transformers = "^4.19.0"
+torch = {version = "^1.11.0", python = "^3.7", platform = "linux"}

 [tool.poetry.dev-dependencies]
 pytest = "^5.2"

+
 [build-system]
 requires = ["poetry-core>=1.0.0"]
 build-backend = "poetry.core.masonry.api"
--- a/textflow/Analyzer.py
+++ b/textflow/Analyzer.py
@@ -3,29 +3,13 @@ import spacy
 import spacy.cli
 from typing import Optional
 from textflow.Sequence import Sequence
-#from transformers import pipeline
+from abc import ABC, abstractmethod


-class Analyzer:
-    def __init__(self, function, isMetadata: Optional[bool] = False,lang : Optional[str] = "es"):
-        """Creates an analyzer from an input object.
+class Analyzer(ABC):

-        Args:
-            function: the function of the analyzer like count word, files...
-            isMetadata: boolean, if the result of the analyzer is stored in metadata (True) or in children(False)
-        """
-        if lang == "es":
-            spacy.cli.download("es_core_news_sm")
-            self.nlp = spacy.load("es_core_news_sm")
-        elif lang == "en":
-            spacy.cli.download("en_core_web_sm")
-            self.nlp = spacy.load("en_core_web_sm")
-        self.lang = lang
-        self.function = function
-        self.isMetadata = isMetadata
-
-
-    def analyze(self, sequence, tag, levelOfAnalyzer, levelOfResult:Optional[str] = "", analyzeMetadata: Optional[bool] = False): #TODO
+    @abstractmethod
+    def analyze(self, functionAnalyzer,sequence, tag, levelOfAnalyzer, levelOfResult:Optional[str] = "", analyzeMetadata: Optional[bool] = False): #TODO
        """Analyze a sequence

        Args:
@@ -40,7 +24,7 @@ class Analyzer:
        """
        if levelOfResult == "":
            if analyzeMetadata:
-                analyzeResult = sequence.filterMetadata(levelOfAnalyzer, self.function)                
+                analyzeResult = sequence.filterMetadata(levelOfAnalyzer, functionAnalyzer)                
                resultOfAnalisys= []
                for i in analyzeResult:
                    resultOfAnalisys.append(i)
@@ -57,9 +41,7 @@ class Analyzer:
                        if r == ruta[-1]:
                            for seq in child[r]:
                                if analyzeMetadata:
-                                    analyzeResult = seq.filterMetadata(levelOfAnalyzer, self.function)
-                                    '''for i in analyzeResult:
-                                        resultOfAnalisys = i'''
+                                    analyzeResult = seq.filterMetadata(levelOfAnalyzer, functionAnalyzer)
                                    
                                    resultOfAnalisys= []
                                    for i in analyzeResult:
@@ -70,7 +52,7 @@ class Analyzer:
                                        seq.metadata[tag] = resultOfAnalisys

                                else:
-                                    analyzeResult = seq.filter(levelOfAnalyzer, self.function)
+                                    analyzeResult = seq.filter(levelOfAnalyzer, functionAnalyzer)
                                    for i in analyzeResult:
                                        resultOfAnalisys = i
                                    if isinstance(resultOfAnalisys[0], Sequence):
@@ -84,146 +66,6 @@ class Analyzer:
                        raise ValueError(f"Sequence level '{r}' not found in {child}")

        
-    #La secuencia siempre debe tener un atributo texto para que este funcione
-    #Contar el numero de palabras, numero de palabras unicas, numero de caracteres y numero medio de caracteres
-    def volumetry(self,sequence,levelOfAnalyze): #TODO: Revisar
-        children = [sequence.children]
-        ruta = levelOfAnalyze.split("/")
-        for r in ruta: #Para cada nivel de la ruta
-            for child in children: #Miramos en todas las secuencias disponibles
-                if r in child: #Si dentro de la secuencia actual está r
-                    if r == ruta[-1]:
-                        for seq in child[r]:
-                            if "text" not in seq.metadata:
-                                raise ValueError(f"Level text not found in {seq.metadata.keys()}")
-                            else:
-                                text = seq.metadata["text"].split(" ")
-                            
-                                volumetry= {
-                                    "words" : len(text),
-                                    "uniqueWords" : len(set(text)),
-                                    "chars" : len(seq.metadata["text"]),
-                                    "avgWordsLen" : round(volumetry["chars"] / volumetry["words"])
-                                }
-
-                                seq.metadata["volumetry"] = volumetry
-                    else:
-                        children = [c.children for c in child[r]]
-                else:
-                    raise ValueError(f"Sequence level '{r}' not found in {child}")
-
-    def lemmas(self, sequence, levelOfAnalyze): #TODO: Revisar
-        children = [sequence.children]
-        ruta = levelOfAnalyze.split("/")
-        for r in ruta: #Para cada nivel de la ruta
-            for child in children: #Miramos en todas las secuencias disponibles
-                if r in child: #Si dentro de la secuencia actual está r
-                    if r == ruta[-1]:
-                        for seq in child[r]:
-                            if "text" not in seq.metadata:
-                                raise ValueError(f"Level text not found in {seq.metadata.keys()}")
-                            else:
-                                sequenceLemmas = []
-                                setLemmas = set()
-                                lemma ={}
-                                sumaLenLemmas=0
-                                text = seq.metadata["text"]
-                                doc= self.nlp(text)
-                                for token in doc:
-                                    if token.pos_ not in ["PUNCT", "SPACE", "SYM"]:
-                                        sumaLenLemmas += len(token.lemma_)
-                                        setLemmas.add(token.lemma_)
-                                        s = Sequence("token",token.lemma_)
-                                        sequenceLemmas.append(s)
    
-                                lemma["uniqueLemmas"] = len(setLemmas)
-                                lemma["avgLemmasLen"] = round(sumaLenLemmas/len(sequenceLemmas))
-                                seq.metadata["lemmas"] = lemma
-                                seq.children["lemmas"] = sequenceLemmas

-                    else:
-                        children = [c.children for c in child[r]]
-                else:
-                    raise ValueError(f"Sequence level '{r}' not found in {child}")
    
\ No newline at end of file
-    #Es necesario tener una etiqueta de token en children, si esta no existe, se creará
-    def pos (self, sequence, levelOfAnalyze): #TODO: Revisar
-        children = [sequence.children]
-        ruta = levelOfAnalyze.split("/")
-        for r in ruta: #Para cada nivel de la ruta
-            for child in children: #Miramos en todas las secuencias disponibles
-                if r in child: #Si dentro de la secuencia actual está r
-                    if r == ruta[-1]:
-                        for seq in child[r]:
-                            if "text" not in seq.metadata:
-                                raise ValueError("The sequence of the level {levelOfAnalyze} don't have atribute text")
-                            else:
-                                doc = self.nlp(seq.metadata["text"])
-                                
-                                if "tokens" not in seq.children:
-                                    #Creamos uno
-                                    pos=[]
-                                    for token in doc:
-                                        s = Sequence("token",token.text)
-                                        s.metadata["pos"] = token.pos_ 
-                                        pos.append(s)
-                                    seq.children["tokens"] = pos
-                                else:
-                                    pos=[]
-                                    for token in doc:
-                                        pos.append(token.pos_)
-                                    for seqToken in seq.children["tokens"]:
-                                        seqToken.metadata["pos"] = pos.pop(0) 
-                                
-                    else:
-                        children = [c.children for c in child[r]]
-                else:
-                    raise ValueError(f"Sequence level '{r}' not found in {child}")
-'''
-    def polaridad(self, sequence, levelOfAnalyze):
-        #https://huggingface.co/finiteautomata/beto-sentiment-analysis
-        if self.lang == "es":
-            polarityClassifier = pipeline("text-classification",model='finiteautomata/beto-sentiment-analysis', return_all_scores=True)
-        elif self.lang == "en":
-            polarityClassifier = pipeline("text-classification",model='finiteautomata/bertweet-base-sentiment-analysis', return_all_scores=True)
-
-        children = [sequence.children]
-        ruta = levelOfAnalyze.split("/")
-        for r in ruta: #Para cada nivel de la ruta
-            for child in children: #Miramos en todas las secuencias disponibles
-                if r in child: #Si dentro de la secuencia actual está r
-                    if r == ruta[-1]:
-                        for seq in child[r]:
-                            if "text" not in seq.metadata:
-                                raise ValueError(f"Level text not found in {seq.metadata.keys()}")
-                            else:
-                                prediction = polarityClassifier(seq.metadata["text"])
-                                seq.metadata["polarity"] = prediction
-                    else:
-                        children = [c.children for c in child[r]]
-                else:
-                    raise ValueError(f"Sequence level '{r}' not found in {child}") 
-        pass
-
-    def emotions(self, sequence, levelOfAnalyze):
-        if self.lang == "es":
-            emotionsClassifier = pipeline("text-classification",model='pysentimiento/robertuito-emotion-analysis', return_all_scores=True)
-        elif self.lang == "en":
-            emotionsClassifier = pipeline("text-classification",model='bhadresh-savani/distilbert-base-uncased-emotion', return_all_scores=True)
-
-        children = [sequence.children]
-        ruta = levelOfAnalyze.split("/")
-        for r in ruta: #Para cada nivel de la ruta
-            for child in children: #Miramos en todas las secuencias disponibles
-                if r in child: #Si dentro de la secuencia actual está r
-                    if r == ruta[-1]:
-                        for seq in child[r]:
-                            if "text" not in seq.metadata:
-                                raise ValueError(f"Level text not found in {seq.metadata.keys()}")
-                            else:
-                                prediction = emotionsClassifier(seq.metadata["text"])
-                                seq.metadata["emotions"] = prediction
-                    else:
-                        children = [c.children for c in child[r]]
-                else:
-                    raise ValueError(f"Sequence level '{r}' not found in {child}")'''
\ No newline at end of file
--- a/textflow/ComplexityAnalyzer.py
+++ b/textflow/ComplexityAnalyzer.py
@@ -7,11 +7,13 @@ import re
 import numpy as np
 import math
 from functools import reduce
+from textflow.Analyzer import Analyzer
+

 creaPath = os.path.join(os.path.dirname(__file__), 'Crea-5000.txt')

-class ComplexityAnalyzer:
-    def __init__(self, lang = "es"):
+class ComplexityAnalyzer(Analyzer):
+    def __init__(self, rutaArchivoCrea = creaPath,lang = "es"):
        """Creates an analyzer from an input object.

        Args:
@@ -22,11 +24,7 @@ class ComplexityAnalyzer:
            spacy.cli.download("es_core_news_sm")
            self.nlp = spacy.load("es_core_news_sm")
            #Vamos a cargar CREA:
-            self.dicFreqWords=self.read(creaPath)
-            self.function = self.complexity
-        '''elif lang == "en":
-            spacy.cli.download("en_core_web_sm")
-            self.nlp = spacy.load("en_core_web_sm")'''
+            self.dicFreqWords=self.read(rutaArchivoCrea)

    #Este analizador, solo puede analizar cadenas de texto, por lo que solo tiene sentido que use el atributo text de metadata
    def analyze(self, sequence, tag, levelOfAnalyzer, levelOfResult:Optional[str] = ""): #TODO
@@ -42,7 +40,8 @@ class ComplexityAnalyzer:
        Raises:
            ValueError if the levelOfResult is incorrect
        """
-        if levelOfResult == "":
+        super().analyze(self.complexity,sequence, tag, levelOfAnalyzer, levelOfResult, True)
+        '''if levelOfResult == "":
            analyzeResult = sequence.filterMetadata(levelOfAnalyzer,self.function)#TODO
            resultOfAnalisys= []
            for i in analyzeResult:
@@ -64,7 +63,7 @@ class ComplexityAnalyzer:
                        else:
                            children = [c.children for c in child[r]]
                    else:
-                        raise ValueError(f"Sequence level '{r}' not found in {child}") 
+                        raise ValueError(f"Sequence level '{r}' not found in {child}") '''


    def read(self,fichero):

--- a/textflow/EmotionAnalyzer.py
+++ b/textflow/EmotionAnalyzer.py
+import os
+import spacy
+import spacy.cli
+from typing import Optional
+from textflow.Analyzer import Analyzer
+from transformers import pipeline
+import torch
+
+class EmotionAnalyzer(Analyzer):
+    def __init__(self, task = "text-classification",modelEmotions = 'pysentimiento/robertuito-emotion-analysis', allScores = True):
+        """Creates an analyzer from an input object.
+
+        Args:
+            function: the function of the analyzer like count word, files...
+            isMetadata: boolean, if the result of the analyzer is stored in metadata (True) or in children(False)
+        """
+        self.emotionsClassifier = pipeline(task,model=modelEmotions, return_all_scores=allScores)
+
+
+    #Este analizador, solo puede analizar cadenas de texto, por lo que solo tiene sentido que use el atributo text de metadata
+    def analyze(self, sequence, tag, levelOfAnalyzer, levelOfResult:Optional[str] = ""): #TODO
+        """Analyze a sequence
+
+        Args:
+            sequence: the Sequence we want to analyze
+            tag: the label to store the analysis resut
+            levelOfAnalyzer: the path of the sequence level to analyze inside of the result(la subsequencia a analizar dentro de la sequencia en la que queremos almacenar el resultado)
+            levelOfResult: the path of the sequence level to store the result. (Podemos querer analizar los tokens pero almacenarlo a nivel de oracion)
+            analyzeMetadata: boolean, if the result of the analyzer is applied in metadata (True) or in children(False)
+
+        Raises:
+            ValueError if the levelOfResult is incorrect
+        """
+        super().analyze(self.emotions,sequence, tag, levelOfAnalyzer, levelOfResult, True) 
+
+
+    def emotions(self, arrayText):
+        arrayResults =[]
+        for text in arrayText:
+            prediction = self.emotionsClassifier(text)
+            #arrayResults.append(prediction[0][0])
+            arrayResults.append(prediction)
+        return arrayResults
+
+
+
--- a/textflow/LemmaAnalyzer.py
+++ b/textflow/LemmaAnalyzer.py
+
+import spacy
+import spacy.cli
+from typing import Optional
+from textflow.Analyzer import Analyzer
+
+spacy.cli.download("es_core_news_sm")
+
+class LemmaAnalyzer(Analyzer):
+    def __init__(self, nlp = spacy.load("es_core_news_sm"), posNoContent = ["PUNCT", "SPACE", "SYM"]):
+        """Creates an analyzer from an input object.
+
+        Args:
+            function: the function of the analyzer like count word, files...
+            isMetadata: boolean, if the result of the analyzer is stored in metadata (True) or in children(False)
+        """
+        self.nlp = nlp
+        self.posNoContent = posNoContent
+
+    #Este analizador, solo puede analizar cadenas de texto, por lo que solo tiene sentido que use el atributo text de metadata
+    def analyze(self, sequence, tag, levelOfAnalyzer, levelOfResult:Optional[str] = ""): #TODO
+        """Analyze a sequence
+
+        Args:
+            sequence: the Sequence we want to analyze
+            tag: the label to store the analysis resut
+            levelOfAnalyzer: the path of the sequence level to analyze inside of the result(la subsequencia a analizar dentro de la sequencia en la que queremos almacenar el resultado)
+            levelOfResult: the path of the sequence level to store the result. (Podemos querer analizar los tokens pero almacenarlo a nivel de oracion)
+            analyzeMetadata: boolean, if the result of the analyzer is applied in metadata (True) or in children(False)
+
+        Raises:
+            ValueError if the levelOfResult is incorrect
+        """
+        super().analyze(self.lemmas,sequence, tag, levelOfAnalyzer, levelOfResult, True)
+
+
+    def lemmas(self, arrayText):
+        arrayResult = []
+        for text in arrayText:
+            sequenceLemmas = []
+            setLemmas = set()
+            sumaLenLemmas=0
+            doc= self.nlp(text)
+            for token in doc:
+                if token.pos_ not in self.posNoContent:
+                    sumaLenLemmas += len(token.lemma_)
+                    setLemmas.add(token.lemma_)
+                    sequenceLemmas.append(token.lemma_)
+            lemma={
+                "srclemmas" : sequenceLemmas,
+                "uniqueLemmas" : len(setLemmas),
+                "avgLemmas" : round(sumaLenLemmas/len(sequenceLemmas)) 
+            }
+            arrayResult.append(lemma)
+        return arrayResult
+
+            
+                                
+                                
+
+
+
--- a/textflow/POSAnalyzer.py
+++ b/textflow/POSAnalyzer.py
+import os
+import spacy
+import spacy.cli
+from typing import Optional
+from textflow.Analyzer import Analyzer
+
+spacy.cli.download("es_core_news_sm")
+
+class POSAnalyzer(Analyzer):
+    def __init__(self, nlp = spacy.load("es_core_news_sm")):
+        """Creates an analyzer from an input object.
+
+        Args:
+            function: the function of the analyzer like count word, files...
+            isMetadata: boolean, if the result of the analyzer is stored in metadata (True) or in children(False)
+        """
+        self.nlp = nlp
+
+    #Este analizador, solo puede analizar cadenas de texto, por lo que solo tiene sentido que use el atributo text de metadata
+    def analyze(self, sequence, tag, levelOfAnalyzer, levelOfResult:Optional[str] = ""): #TODO
+        """Analyze a sequence
+
+        Args:
+            sequence: the Sequence we want to analyze
+            tag: the label to store the analysis resut
+            levelOfAnalyzer: the path of the sequence level to analyze inside of the result(la subsequencia a analizar dentro de la sequencia en la que queremos almacenar el resultado)
+            levelOfResult: the path of the sequence level to store the result. (Podemos querer analizar los tokens pero almacenarlo a nivel de oracion)
+            analyzeMetadata: boolean, if the result of the analyzer is applied in metadata (True) or in children(False)
+
+        Raises:
+            ValueError if the levelOfResult is incorrect
+        """
+        super().analyze(self.pos,sequence, tag, levelOfAnalyzer, levelOfResult, True)
+
+    def pos(self,arrayText):
+        arrayResults = []
+        for text in arrayText:
+            srcPOS = []
+            dicFreqPOS = {} 
+            doc = self.nlp(text)
+            for token in doc:
+                srcPOS.append(token.pos_)
+                if token.pos_ in dicFreqPOS:
+                    dicFreqPOS[token.pos_] += 1
+                else:
+                    dicFreqPOS[token.pos_] = 1
+            pos = {
+                "srcPOS": srcPOS,
+                "FreqPOS": dicFreqPOS
+            }
+            arrayResults.append(pos)
+        return arrayResults    
+                                
+
+
--- a/textflow/PolarityAnalyzer.py
+++ b/textflow/PolarityAnalyzer.py
+import os
+import spacy
+import spacy.cli
+from typing import Optional
+from textflow.Analyzer import Analyzer
+from transformers import pipeline
+import torch
+
+class PolarityAnalyzer(Analyzer):
+    def __init__(self, task = "text-classification",modelPolarity = 'finiteautomata/beto-sentiment-analysis', allScores = True):
+        """Creates an analyzer from an input object.
+
+        Args:
+            function: the function of the analyzer like count word, files...
+            isMetadata: boolean, if the result of the analyzer is stored in metadata (True) or in children(False)
+        """
+        self.polarityClassifier = pipeline(task,model= modelPolarity, return_all_scores=allScores)
+        
+
+    #Este analizador, solo puede analizar cadenas de texto, por lo que solo tiene sentido que use el atributo text de metadata
+    def analyze(self, sequence, tag, levelOfAnalyzer, levelOfResult:Optional[str] = ""): #TODO
+        """Analyze a sequence
+
+        Args:
+            sequence: the Sequence we want to analyze
+            tag: the label to store the analysis resut
+            levelOfAnalyzer: the path of the sequence level to analyze inside of the result(la subsequencia a analizar dentro de la sequencia en la que queremos almacenar el resultado)
+            levelOfResult: the path of the sequence level to store the result. (Podemos querer analizar los tokens pero almacenarlo a nivel de oracion)
+            analyzeMetadata: boolean, if the result of the analyzer is applied in metadata (True) or in children(False)
+
+        Raises:
+            ValueError if the levelOfResult is incorrect
+        """
+        super().analyze(self.polarity,sequence, tag, levelOfAnalyzer, levelOfResult, True)
+
+    def polarity(self, arrayText):
+        arrayResults =[]
+        for text in arrayText:
+            prediction = self.polarityClassifier(text)
+            #arrayResults.append(prediction[0][0])
+            arrayResults.append(prediction)
+        return arrayResults
+
--- a/textflow/Sequence.py
+++ b/textflow/Sequence.py
@@ -4,6 +4,7 @@ from nltk.tokenize import TreebankWordTokenizer
 from nltk.tokenize import WhitespaceTokenizer
 from nltk.tokenize import SpaceTokenizer
 from nltk.tokenize import WordPunctTokenizer
+from nltk.tokenize import RegexpTokenizer


 class SequenceIterator:

--- a/textflow/StylometryAnalyzer.py
+++ b/textflow/StylometryAnalyzer.py
+import string
 from typing import Optional

-import spacy
-import spacy.cli
+#import spacy
+#import spacy.cli
+from nltk.text import Text
+from nltk.tokenize import WhitespaceTokenizer
+import math

+from textflow.Analyzer import Analyzer

-class StylometryyAnalyzer: #TODO
-    def __init__(self, lang = "es"):
-        if lang == "es":
-            spacy.cli.download("es_core_news_sm")
-            self.nlp = spacy.load("es_core_news_sm")
-        self.function = self.stylometry
-        pass
+class StylometryAnalyzer(Analyzer): #TODO
+
+    def __init__(self,stopwords, puntuation = string.punctuation,tokenizer = WhitespaceTokenizer()):
+        self.stopwords = stopwords
+        self.puntuation = puntuation
+        self.tokenizer = tokenizer

    #Este analizador, solo puede analizar cadenas de texto, por lo que solo tiene sentido que use el atributo text de metadata
-    def analyze(self, sequence, tag, levelOfAnalyzer, levelOfResult:Optional[str] = ""): #TODO
-        """Analyze a sequence
-
-        Args:
-            sequence: the Sequence we want to analyze
-            tag: the label to store the analysis resut
-            levelOfAnalyzer: the path of the sequence level to analyze inside of the result(la subsequencia a analizar dentro de la sequencia en la que queremos almacenar el resultado)
-            levelOfResult: the path of the sequence level to store the result. (Podemos querer analizar los tokens pero almacenarlo a nivel de oracion)
-            analyzeMetadata: boolean, if the result of the analyzer is applied in metadata (True) or in children(False)
-
-        Raises:
-            ValueError if the levelOfResult is incorrect
-        """
-        if levelOfResult == "":
-            analyzeResult = sequence.filterMetadata(levelOfAnalyzer,self.function)#TODO
-            resultOfAnalisys= []
-            for i in analyzeResult:
-                resultOfAnalisys.append(i)
-            sequence.metadata[tag] = resultOfAnalisys
-        else:
-            children = [sequence.children]
-            ruta = levelOfResult.split("/")
-            for r in ruta: #Para cada nivel de la ruta
-                for child in children: #Miramos en todas las secuencias disponibles
-                    if r in child: #Si dentro de la secuencia actual está r
-                        if r == ruta[-1]:
-                            for seq in child[r]:
-                                analyzeResult = seq.filterMetadata(levelOfAnalyzer,self.function)  
-                                resultOfAnalisys= []
-                                for i in analyzeResult:
-                                    resultOfAnalisys.append(i)
-                                seq.metadata[tag] = resultOfAnalisys                           
+    def analyze(self, sequence, tag, levelOfAnalyzer, levelOfResult:Optional[str]= ""):
+        super().analyze(self.stylometry,sequence, tag, levelOfAnalyzer, levelOfResult, True)
+
+    def stylometry(self, arrayText):
+        resultsList = []
+        for t in arrayText:
+            #doc = self.nlp(text)
+            t.lower()
+            tokens = self.tokenizer.tokenize (t)
+            text= [token.lower() for token in tokens]
+            self.freqWords(text,self.stopwords,self.puntuation)
+            self.funcionesTTR(text)
+            result={
+                "uniqueWords": len(self.uniqueWords),
+                "TTR": self.TTR,
+                "RTTR": self.RTTR,
+                "Herdan": self.herdan,
+                "Mass": self.mass,
+                "Somers": self.somers,
+                "Dugast": self.dugast,
+                "Honore": self.honore,
+                "FreqStopWords": self.freqStopWords,
+                "FreqPuntuationMarks": self.freqPuntuationMarks,
+                "FreqWords": self.freqWord
+            }
+            resultsList.append(result)
+        return resultsList
+
+    def funcionesTTR(self, text):
+        self.uniqueWords = [token[0] for token in self.freqWord]
+        self.numWordFreqOne = len( [token[0] for token in self.freqWord if token[1] == 1 ])
+        self.TTR = len(self.uniqueWords) / len(text)
+        self.RTTR = len(self.uniqueWords) / math.sqrt(len(text))
+        self.herdan = math.log(len(self.uniqueWords),10) / math.log(len(text),10)
+        self.mass = (math.log(len(text),10)- math.log(len(self.uniqueWords),10)) /  pow(math.log(len(self.uniqueWords),10),2)
+        self.somers = math.log(math.log(len(self.uniqueWords),10),10) / math.log(math.log(len(text),10),10)
+        if math.log(len(text),10)- math.log(len(self.uniqueWords),10) == 0:
+            self.dugast = pow(math.log(len(text),10),2)
        else:
-                            children = [c.children for c in child[r]]
+            self.dugast = pow(math.log(len(text),10),2) / (math.log(len(text),10)- math.log(len(self.uniqueWords),10))
+        if 1-(self.numWordFreqOne/len(self.uniqueWords)) == 0:
+            self.honore = 100*(math.log(len(text),10))
        else:
-                        raise ValueError(f"Sequence level '{r}' not found in {child}") 
+            self.honore = 100*(math.log(len(text),10)/(1-(self.numWordFreqOne/len(self.uniqueWords))))    
+

-    def stylometry(self):
-        pass
+    def freqWords(self,tokens, stopWords, puntuationMarks):
+        freqStopWords = {}
+        freqPuntuationMarks = {}
+        freqWord ={} 
+        for token in tokens:
+            if token in stopWords:
+                if token in freqStopWords:
+                    freqStopWords[token] += 1
+                else:
+                    freqStopWords[token] = 1
+            elif token in puntuationMarks:
+                if token in freqPuntuationMarks:
+                    freqPuntuationMarks[token] += 1
+                else:
+                    freqPuntuationMarks[token] = 1
+            else: 
+                if token in freqWord:
+                    freqWord[token] += 1
+                else:
+                    freqWord[token] = 1
+        self.freqWord = sorted(freqWord.items(), reverse = True)
+        self.freqPuntuationMarks = sorted(freqPuntuationMarks.items(), reverse = True)
+        self.freqStopWords = sorted(freqStopWords.items(), reverse = True)   
\ No newline at end of file
--- a/textflow/VolumetryAnalyzer.py
+++ b/textflow/VolumetryAnalyzer.py
+from typing import Optional
+from textflow.Sequence import Sequence
+from nltk.tokenize import WhitespaceTokenizer
+from textflow.Analyzer import Analyzer
+
+class VolumetryAnalyzer(Analyzer):
+    def __init__(self, tokenizer= WhitespaceTokenizer()):
+        """Creates an analyzer from an input object.
+
+        Args:
+            function: the function of the analyzer like count word, files...
+            isMetadata: boolean, if the result of the analyzer is stored in metadata (True) or in children(False)
+        """
+        self.tokenizer = tokenizer
+
+
+    def volumetry(self, arrayText):
+        arrayResults =[]
+        for texts in arrayText:
+            text = self.tokenizer.tokenize(texts)
+            dicResults = { 
+                "words" : len(text),
+                "uniqueWords" : len(set(text)),
+                "chars" : len(texts),
+                "avgWordsLen" : round(len(texts) / len(text))         
+            }
+            arrayResults.append(dicResults)
+        return arrayResults
+
+    #La secuencia siempre debe tener un atributo texto(metadata) para que este funcione
+    #Contar el numero de palabras, numero de palabras unicas, numero de caracteres y numero medio de caracteres
+    def analyze(self,sequence,tag,levelOfAnalyzer,levelOfResult:Optional[str] = ""):
+        super().analyze(self.volumetry,sequence, tag, levelOfAnalyzer, levelOfResult, True)
+        '''children = [sequence.children]
+        ruta = levelOfAnalyze.split("/")
+        for r in ruta: #Para cada nivel de la ruta
+            for child in children: #Miramos en todas las secuencias disponibles
+                if r in child: #Si dentro de la secuencia actual está r
+                    if r == ruta[-1]:
+                        for seq in child[r]:
+                            if "text" not in seq.metadata:
+                                raise ValueError(f"Level text not found in {seq.metadata.keys()}")
+                            else:
+                                text = seq.metadata["text"].split(" ")
+                            
+                                volumetry= {
+                                    "words" : len(text),
+                                    "uniqueWords" : len(set(text)),
+                                    "chars" : len(seq.metadata["text"]),
+                                    "avgWordsLen" : round(volumetry["chars"] / volumetry["words"])
+                                }
+
+                                seq.metadata["volumetry"] = volumetry
+                    else:
+                        children = [c.children for c in child[r]]
+                else:
+                    raise ValueError(f"Sequence level '{r}' not found in {child}")'''
+
+   
\ No newline at end of file