Creation of test.apply global method

6fbbd89e · Estrella Vallecillo · 0bd3c574 · 6fbbd89e · 6fbbd89e · 6fbbd89e
Commit 6fbbd89e authored Oct 20, 2023 by Estrella Vallecillo
Showing with 185 additions and 131 deletions
Examples/Example copy.ipynb
Examples/Example.ipynb
textflow/Test.py
textflow/Visualization.py
--- a/Examples/Example copy.ipynb
+++ b/Examples/Example copy.ipynb
--- a/Examples/Example.ipynb
+++ b/Examples/Example.ipynb
 {
-  "nbformat": 4,
-  "nbformat_minor": 0,
-  "metadata": {
-    "colab": {
-      "name": "Example.ipynb",
-      "provenance": [],
-      "collapsed_sections": []
-    },
-    "kernelspec": {
-      "name": "python3",
-      "display_name": "Python 3"
-    },
-    "language_info": {
-      "name": "python"
-    }
-  },
  "cells": [
    {
      "cell_type": "markdown",
-      "source": [
-        "Import the necessary modules and packages"
-      ],
      "metadata": {
        "id": "CxAzLreH8UfT"
-      }
+      },
+      "source": [
+        "Import the necessary modules and packages"
+      ]
    },
    {
      "cell_type": "code",
-      "execution_count": null,
+      "execution_count": 3,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "import os"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 1,
      "metadata": {
-        "id": "-OACG_k2-zh2",
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
+        "id": "-OACG_k2-zh2",
        "outputId": "ed951cba-8b10-469d-f910-ed62987324c9"
      },
      "outputs": [
        {
-          "output_type": "stream",
          "name": "stdout",
+          "output_type": "stream",
          "text": [
-            "\u001b[38;5;2m✔ Download and installation successful\u001b[0m\n",
+            "✔ Download and installation successful\n",
            "You can now load the package via spacy.load('es_core_news_sm')\n"
          ]
        }
@@ -55,29 +48,27 @@
    },
    {
      "cell_type": "markdown",
-      "source": [
-        "Create a Sequence from a directory:"
-      ],
      "metadata": {
        "id": "_mnz9zmIZ93g"
-      }
+      },
+      "source": [
+        "Create a Sequence from a directory:"
+      ]
    },
    {
      "cell_type": "code",
-      "source": [
-        "s = SequenceDirectory(os.getcwd()+\"/ExampleDirectory\",[\"directories\",\"files\",\"words\"],[SequenceFile,SequenceString])"
-      ],
+      "execution_count": 4,
      "metadata": {
        "id": "aW_DovfAZ8TL"
      },
-      "execution_count": null,
-      "outputs": []
+      "outputs": [],
+      "source": [
+        "s = SequenceDirectory(os.getcwd()+\"/ExampleDirectory\",[\"directories\",\"files\",\"words\"],[SequenceFile,SequenceString])"
+      ]
    },
    {
      "cell_type": "code",
-      "source": [
-        "s"
-      ],
+      "execution_count": 5,
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
@@ -85,21 +76,35 @@
        "id": "l30ebggfl_rc",
        "outputId": "8eb4cd20-04b7-4b64-b8a5-8daacd0772df"
      },
-      "execution_count": null,
      "outputs": [
        {
-          "output_type": "execute_result",
          "data": {
            "text/plain": [
              "Sequence(\n",
              "  format: directory\n",
-              "  metadata: {'nameFiles': ['Documento sin título.txt'], 'directoriesPath': []}\n",
+              "  metadata: {'nameFiles': ['Documento sin título copy.txt', 'Documento sin título.txt'], 'directoriesPath': []}\n",
              "  children: {'files': [Sequence(\n",
              "  format: file\n",
-              "  metadata: {'text': '\\ufeffVeo que en este foro, afortunadamente para vosotros, no hay mucha gente que sufra de TOC.Si hay alguien por ahí, me gustaría que compartiérais vuestras opiniones, yo compruebo las cosas que hago porque tengo miedo de haberme equivocado y pienso en las consecuencias que ese error podría acarrearme, y las compruebo una y otra vez, y esto me angustia.\\nSé que abrí un post parecido hace tiempo, pero ya quedó abajo y por tanto en el olvido, por eso abro este por si alguna persona nueva con este problema lo lee.Me gustaría saber qué os recetan a vosotros para esto y si os va bien.\\n\\n\\nSaludos.\\nNereida.', 'nameFile': 'Documento sin título.txt'}\n",
+              "  metadata: {'text': 'ï»¿Hola, Â¿Como estas?', 'nameFile': 'Documento sin título copy.txt'}\n",
              "  children: {'files': [Sequence(\n",
              "  format: string\n",
-              "  metadata: {'text': '\\ufeffVeo'}\n",
+              "  metadata: {'text': 'ï»¿Hola,'}\n",
+              "  children: {}\n",
+              "), Sequence(\n",
+              "  format: string\n",
+              "  metadata: {'text': 'Â¿Como'}\n",
+              "  children: {}\n",
+              "), Sequence(\n",
+              "  format: string\n",
+              "  metadata: {'text': 'estas?'}\n",
+              "  children: {}\n",
+              ")]}\n",
+              "), Sequence(\n",
+              "  format: file\n",
+              "  metadata: {'text': 'ï»¿Veo que en este foro, afortunadamente para vosotros, no hay mucha gente que sufra de TOC.Si hay alguien por ahÃ\\xad, me gustarÃ\\xada que compartiÃ©rais vuestras opiniones, yo compruebo las cosas que hago porque tengo miedo de haberme equivocado y pienso en las consecuencias que ese error podrÃ\\xada acarrearme, y las compruebo una y otra vez, y esto me angustia.\\nSÃ© que abrÃ\\xad un post parecido hace tiempo, pero ya quedÃ³ abajo y por tanto en el olvido, por eso abro este por si alguna persona nueva con este problema lo lee.Me gustarÃ\\xada saber quÃ© os recetan a vosotros para esto y si os va bien.\\n\\n\\nSaludos.\\nNereida.', 'nameFile': 'Documento sin título.txt'}\n",
+              "  children: {'files': [Sequence(\n",
+              "  format: string\n",
+              "  metadata: {'text': 'ï»¿Veo'}\n",
              "  children: {}\n",
              "), Sequence(\n",
              "  format: string\n",
@@ -175,7 +180,7 @@
              "  children: {}\n",
              "), Sequence(\n",
              "  format: string\n",
-              "  metadata: {'text': 'ahí,'}\n",
+              "  metadata: {'text': 'ahÃ\\xad,'}\n",
              "  children: {}\n",
              "), Sequence(\n",
              "  format: string\n",
@@ -183,7 +188,7 @@
              "  children: {}\n",
              "), Sequence(\n",
              "  format: string\n",
-              "  metadata: {'text': 'gustaría'}\n",
+              "  metadata: {'text': 'gustarÃ\\xada'}\n",
              "  children: {}\n",
              "), Sequence(\n",
              "  format: string\n",
@@ -191,7 +196,7 @@
              "  children: {}\n",
              "), Sequence(\n",
              "  format: string\n",
-              "  metadata: {'text': 'compartiérais'}\n",
+              "  metadata: {'text': 'compartiÃ©rais'}\n",
              "  children: {}\n",
              "), Sequence(\n",
              "  format: string\n",
@@ -283,7 +288,7 @@
              "  children: {}\n",
              "), Sequence(\n",
              "  format: string\n",
-              "  metadata: {'text': 'podría'}\n",
+              "  metadata: {'text': 'podrÃ\\xada'}\n",
              "  children: {}\n",
              "), Sequence(\n",
              "  format: string\n",
@@ -335,7 +340,7 @@
              "  children: {}\n",
              "), Sequence(\n",
              "  format: string\n",
-              "  metadata: {'text': 'Sé'}\n",
+              "  metadata: {'text': 'SÃ©'}\n",
              "  children: {}\n",
              "), Sequence(\n",
              "  format: string\n",
@@ -343,7 +348,7 @@
              "  children: {}\n",
              "), Sequence(\n",
              "  format: string\n",
-              "  metadata: {'text': 'abrí'}\n",
+              "  metadata: {'text': 'abrÃ\\xad'}\n",
              "  children: {}\n",
              "), Sequence(\n",
              "  format: string\n",
@@ -375,7 +380,7 @@
              "  children: {}\n",
              "), Sequence(\n",
              "  format: string\n",
-              "  metadata: {'text': 'quedó'}\n",
+              "  metadata: {'text': 'quedÃ³'}\n",
              "  children: {}\n",
              "), Sequence(\n",
              "  format: string\n",
@@ -463,7 +468,7 @@
              "  children: {}\n",
              "), Sequence(\n",
              "  format: string\n",
-              "  metadata: {'text': 'gustaría'}\n",
+              "  metadata: {'text': 'gustarÃ\\xada'}\n",
              "  children: {}\n",
              "), Sequence(\n",
              "  format: string\n",
@@ -471,7 +476,7 @@
              "  children: {}\n",
              "), Sequence(\n",
              "  format: string\n",
-              "  metadata: {'text': 'qué'}\n",
+              "  metadata: {'text': 'quÃ©'}\n",
              "  children: {}\n",
              "), Sequence(\n",
              "  format: string\n",
@@ -530,28 +535,28 @@
              ")"
            ]
          },
+          "execution_count": 5,
          "metadata": {},
-          "execution_count": 7
+          "output_type": "execute_result"
        }
+      ],
+      "source": [
+        "s"
      ]
    },
    {
      "cell_type": "markdown",
+      "metadata": {
+        "id": "EYqoiVQBaIAq"
+      },
      "source": [
        "Create a POS analyzer and apply it to a sequence:\n",
        "+ Store the result of the analyzer in the same level as the analysis level "
-      ],
-      "metadata": {
-        "id": "EYqoiVQBaIAq"
-      }
+      ]
    },
    {
      "cell_type": "code",
-      "source": [
-        "posAnalyzer = POSAnalyzer()\n",
-        "posAnalyzer.analyze(s,\"POS\",\"files/text\")\n",
-        "s"
-      ],
+      "execution_count": null,
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
@@ -559,10 +564,8 @@
        "id": "OVYdTcDOn04r",
        "outputId": "cf00a1a7-1fbf-42fd-ea58-04770d4f4dbc"
      },
-      "execution_count": null,
      "outputs": [
        {
-          "output_type": "execute_result",
          "data": {
            "text/plain": [
              "Sequence(\n",
@@ -1004,28 +1007,29 @@
              ")"
            ]
          },
+          "execution_count": 8,
          "metadata": {},
-          "execution_count": 8
+          "output_type": "execute_result"
        }
+      ],
+      "source": [
+        "posAnalyzer = POSAnalyzer()\n",
+        "posAnalyzer.analyze(s,\"POS\",\"files/text\")\n",
+        "s"
      ]
    },
    {
      "cell_type": "markdown",
-      "source": [
-        "+ Store the result of the analyzer in higher levels than the analysis level"
-      ],
      "metadata": {
        "id": "Y7TlOcGiAJcZ"
-      }
+      },
+      "source": [
+        "+ Store the result of the analyzer in higher levels than the analysis level"
+      ]
    },
    {
      "cell_type": "code",
-      "source": [
-        "s = SequenceDirectory(os.getcwd()+\"/ExampleDirectory\")\n",
-        "posAnalyzer = POSAnalyzer()\n",
-        "posAnalyzer.analyze(s,\"POS\",\"text\",\"files\")\n",
-        "s"
-      ],
+      "execution_count": null,
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
@@ -1033,10 +1037,8 @@
        "id": "1fllpdvltmHH",
        "outputId": "d4505c18-843d-44ce-cabb-4d5ff23d5187"
      },
-      "execution_count": null,
      "outputs": [
        {
-          "output_type": "execute_result",
          "data": {
            "text/plain": [
              "Sequence(\n",
@@ -1478,42 +1480,54 @@
              ")"
            ]
          },
+          "execution_count": 9,
          "metadata": {},
-          "execution_count": 9
+          "output_type": "execute_result"
        }
+      ],
+      "source": [
+        "s = SequenceDirectory(os.getcwd()+\"/ExampleDirectory\")\n",
+        "posAnalyzer = POSAnalyzer()\n",
+        "posAnalyzer.analyze(s,\"POS\",\"text\",\"files\")\n",
+        "s"
      ]
    },
    {
      "cell_type": "markdown",
-      "source": [
-        "Use of the filter method of a sequence:"
-      ],
      "metadata": {
        "id": "0YK8UKkIToJm"
-      }
+      },
+      "source": [
+        "Use of the filter method of a sequence:"
+      ]
    },
    {
      "cell_type": "markdown",
-      "source": [
-        "  Criteria function"
-      ],
      "metadata": {
        "id": "DHbRPI8MUANE"
-      }
+      },
+      "source": [
+        "  Criteria function"
+      ]
    },
    {
      "cell_type": "code",
-      "source": [
-        "s = SequenceDirectory(os.getcwd()+\"/ExampleDirectory\")"
-      ],
+      "execution_count": null,
      "metadata": {
        "id": "bkytecNImwv3"
      },
-      "execution_count": null,
-      "outputs": []
+      "outputs": [],
+      "source": [
+        "s = SequenceDirectory(os.getcwd()+\"/ExampleDirectory\")"
+      ]
    },
    {
      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "N3dlbFKCT_KQ"
+      },
+      "outputs": [],
      "source": [
        "def sequenciasMenores4letras(arraySequencias):\n",
        "  result=[]\n",
@@ -1521,41 +1535,31 @@
        "    if len(i.metadata[\"text\"]) < 4:\n",
        "      result.append(i)\n",
        "  return result"
-      ],
-      "metadata": {
-        "id": "N3dlbFKCT_KQ"
-      },
-      "execution_count": null,
-      "outputs": []
+      ]
    },
    {
      "cell_type": "markdown",
-      "source": [
-        "Using de filter"
-      ],
      "metadata": {
        "id": "ruj6I_hK7MzL"
-      }
+      },
+      "source": [
+        "Using de filter"
+      ]
    },
    {
      "cell_type": "code",
-      "source": [
-        "for i in s.filter(\"files/files\",sequenciasMenores4letras):\n",
-        "  print(i)\n",
-        "  print(type(i))"
-      ],
+      "execution_count": null,
      "metadata": {
-        "id": "QwHAoIERVHOn",
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
+        "id": "QwHAoIERVHOn",
        "outputId": "0c978552-2783-42a2-cb76-063e02cd011c"
      },
-      "execution_count": null,
      "outputs": [
        {
-          "output_type": "stream",
          "name": "stdout",
+          "output_type": "stream",
          "text": [
            "que\n",
            "<class 'textflow.SequenceString.SequenceString'>\n",
@@ -1651,19 +1655,29 @@
            "<class 'textflow.SequenceString.SequenceString'>\n"
          ]
        }
+      ],
+      "source": [
+        "for i in s.filter(\"files/files\",sequenciasMenores4letras):\n",
+        "  print(i)\n",
+        "  print(type(i))"
      ]
    },
    {
      "cell_type": "markdown",
-      "source": [
-        "Creating a new criteria function for use in filtermetada function:"
-      ],
      "metadata": {
        "id": "cWCfr8xV7l3Y"
-      }
+      },
+      "source": [
+        "Creating a new criteria function for use in filtermetada function:"
+      ]
    },
    {
      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "Mol4alQq7elK"
+      },
+      "outputs": [],
      "source": [
        "def sequenciasMenores4letrasMetadata(arraySequencias):\n",
        "  result=[]\n",
@@ -1671,32 +1685,22 @@
        "    if len(i) < 4:\n",
        "      result.append(i)\n",
        "  return result"
-      ],
-      "metadata": {
-        "id": "Mol4alQq7elK"
-      },
-      "execution_count": null,
-      "outputs": []
+      ]
    },
    {
      "cell_type": "code",
-      "source": [
-        "for i in s.filterMetadata(\"files/files/text\", sequenciasMenores4letrasMetadata):\n",
-        "  print(i)\n",
-        "  print(type(i))"
-      ],
+      "execution_count": null,
      "metadata": {
-        "id": "wYs7VwmRTxnL",
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
+        "id": "wYs7VwmRTxnL",
        "outputId": "536f4b80-6a0e-4092-ba38-7798c90b9c8c"
      },
-      "execution_count": null,
      "outputs": [
        {
-          "output_type": "stream",
          "name": "stdout",
+          "output_type": "stream",
          "text": [
            "que\n",
            "<class 'str'>\n",
@@ -1792,7 +1796,37 @@
            "<class 'str'>\n"
          ]
        }
+      ],
+      "source": [
+        "for i in s.filterMetadata(\"files/files/text\", sequenciasMenores4letrasMetadata):\n",
+        "  print(i)\n",
+        "  print(type(i))"
      ]
    }
-  ]
-}
\ No newline at end of file
+  ],
+  "metadata": {
+    "colab": {
+      "collapsed_sections": [],
+      "name": "Example.ipynb",
+      "provenance": []
+    },
+    "kernelspec": {
+      "display_name": "Python 3",
+      "name": "python3"
+    },
+    "language_info": {
+      "codemirror_mode": {
+        "name": "ipython",
+        "version": 3
+      },
+      "file_extension": ".py",
+      "mimetype": "text/x-python",
+      "name": "python",
+      "nbconvert_exporter": "python",
+      "pygments_lexer": "ipython3",
+      "version": "3.10.11"
+    }
+  },
+  "nbformat": 4,
+  "nbformat_minor": 0
+}
--- a/textflow/Test.py
+++ b/textflow/Test.py
@@ -15,7 +15,23 @@ class Test():
        self.alpha = alpha
        

-    def apply(self,df1,df2):
+    def apply(self,df1,df2,criteriaColumn1,criteriaColumn2, visualizer = None):
+        df = pd.concat([df1,df2], axis=1)
+        numeric_cols = [col for col, dtype in zip(df.columns, df.dtypes) if dtype != 'object']
+
+        print("---------------------------------------NORMALITY TEST---------------------------------------")
+        normal_results =self.applyNormalTest(df)
+        normal_features= set()
+        for key in normal_results[1]:
+            normal_features= normal_features | set(normal_results[key])
+        print("---------------------------------------PARAMETRIC TEST---------------------------------------")
+        parametricResults = self.applyParametricTest(df1, df2, criteriaColumn1,criteriaColumn2, normal_features)
+        print("---------------------------------------NON-PARAMETRIC TEST---------------------------------------")    
+        nonParametricResults = self.applyNonParametricTest(df1, df2, criteriaColumn1,criteriaColumn2, numeric_cols)
+        dicResults = {"normalTest":normal_results,"parametricTest":parametricResults,"nonParametricTes":nonParametricResults}
+        return dicResults
+
+
        #Hay que poner gráficas:
        #   qUARTIL QUARTIL
        #   Box Plot

--- a/textflow/Visualization.py
+++ b/textflow/Visualization.py
@@ -87,6 +87,10 @@ class Visualization():
        plt.show()
        pass

-    def show_wordCloud(self):
-        pass
+    def show_wordCloud(self,df,textColumns, groupby=None):
+        if type(textColumns) == str:
+            pass
+        else:
+        
+            pass
    
\ No newline at end of file