Creation of test.apply global method

parent 0bd3c574
This diff could not be displayed because it is too large.
{
"nbformat": 4,
"nbformat_minor": 0,
"metadata": {
"colab": {
"name": "Example.ipynb",
"provenance": [],
"collapsed_sections": []
},
"kernelspec": {
"name": "python3",
"display_name": "Python 3"
},
"language_info": {
"name": "python"
}
},
"cells": [
{
"cell_type": "markdown",
"source": [
"Import the necessary modules and packages"
],
"metadata": {
"id": "CxAzLreH8UfT"
}
},
"source": [
"Import the necessary modules and packages"
]
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 3,
"metadata": {},
"outputs": [],
"source": [
"import os"
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {
"id": "-OACG_k2-zh2",
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "-OACG_k2-zh2",
"outputId": "ed951cba-8b10-469d-f910-ed62987324c9"
},
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"output_type": "stream",
"text": [
"\u001b[38;5;2m✔ Download and installation successful\u001b[0m\n",
"✔ Download and installation successful\n",
"You can now load the package via spacy.load('es_core_news_sm')\n"
]
}
......@@ -55,29 +48,27 @@
},
{
"cell_type": "markdown",
"source": [
"Create a Sequence from a directory:"
],
"metadata": {
"id": "_mnz9zmIZ93g"
}
},
"source": [
"Create a Sequence from a directory:"
]
},
{
"cell_type": "code",
"source": [
"s = SequenceDirectory(os.getcwd()+\"/ExampleDirectory\",[\"directories\",\"files\",\"words\"],[SequenceFile,SequenceString])"
],
"execution_count": 4,
"metadata": {
"id": "aW_DovfAZ8TL"
},
"execution_count": null,
"outputs": []
"outputs": [],
"source": [
"s = SequenceDirectory(os.getcwd()+\"/ExampleDirectory\",[\"directories\",\"files\",\"words\"],[SequenceFile,SequenceString])"
]
},
{
"cell_type": "code",
"source": [
"s"
],
"execution_count": 5,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
......@@ -85,21 +76,35 @@
"id": "l30ebggfl_rc",
"outputId": "8eb4cd20-04b7-4b64-b8a5-8daacd0772df"
},
"execution_count": null,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"Sequence(\n",
" format: directory\n",
" metadata: {'nameFiles': ['Documento sin ttulo.txt'], 'directoriesPath': []}\n",
" metadata: {'nameFiles': ['Documento sin título copy.txt', 'Documento sin título.txt'], 'directoriesPath': []}\n",
" children: {'files': [Sequence(\n",
" format: file\n",
" metadata: {'text': '\\ufeffVeo que en este foro, afortunadamente para vosotros, no hay mucha gente que sufra de TOC.Si hay alguien por ahí, me gustaría que compartiérais vuestras opiniones, yo compruebo las cosas que hago porque tengo miedo de haberme equivocado y pienso en las consecuencias que ese error podría acarrearme, y las compruebo una y otra vez, y esto me angustia.\\nSé que abrí un post parecido hace tiempo, pero ya quedó abajo y por tanto en el olvido, por eso abro este por si alguna persona nueva con este problema lo lee.Me gustaría saber qué os recetan a vosotros para esto y si os va bien.\\n\\n\\nSaludos.\\nNereida.', 'nameFile': 'Documento sin título.txt'}\n",
" metadata: {'text': 'Hola, ¿Como estas?', 'nameFile': 'Documento sin título copy.txt'}\n",
" children: {'files': [Sequence(\n",
" format: string\n",
" metadata: {'text': '\\ufeffVeo'}\n",
" metadata: {'text': 'Hola,'}\n",
" children: {}\n",
"), Sequence(\n",
" format: string\n",
" metadata: {'text': '¿Como'}\n",
" children: {}\n",
"), Sequence(\n",
" format: string\n",
" metadata: {'text': 'estas?'}\n",
" children: {}\n",
")]}\n",
"), Sequence(\n",
" format: file\n",
" metadata: {'text': 'Veo que en este foro, afortunadamente para vosotros, no hay mucha gente que sufra de TOC.Si hay alguien por ahÃ\\xad, me gustarÃ\\xada que compartiérais vuestras opiniones, yo compruebo las cosas que hago porque tengo miedo de haberme equivocado y pienso en las consecuencias que ese error podrÃ\\xada acarrearme, y las compruebo una y otra vez, y esto me angustia.\\nSé que abrÃ\\xad un post parecido hace tiempo, pero ya quedó abajo y por tanto en el olvido, por eso abro este por si alguna persona nueva con este problema lo lee.Me gustarÃ\\xada saber qué os recetan a vosotros para esto y si os va bien.\\n\\n\\nSaludos.\\nNereida.', 'nameFile': 'Documento sin título.txt'}\n",
" children: {'files': [Sequence(\n",
" format: string\n",
" metadata: {'text': 'Veo'}\n",
" children: {}\n",
"), Sequence(\n",
" format: string\n",
......@@ -175,7 +180,7 @@
" children: {}\n",
"), Sequence(\n",
" format: string\n",
" metadata: {'text': 'ahí,'}\n",
" metadata: {'text': 'ahÃ\\xad,'}\n",
" children: {}\n",
"), Sequence(\n",
" format: string\n",
......@@ -183,7 +188,7 @@
" children: {}\n",
"), Sequence(\n",
" format: string\n",
" metadata: {'text': 'gustaría'}\n",
" metadata: {'text': 'gustarÃ\\xada'}\n",
" children: {}\n",
"), Sequence(\n",
" format: string\n",
......@@ -191,7 +196,7 @@
" children: {}\n",
"), Sequence(\n",
" format: string\n",
" metadata: {'text': 'compartiérais'}\n",
" metadata: {'text': 'compartiérais'}\n",
" children: {}\n",
"), Sequence(\n",
" format: string\n",
......@@ -283,7 +288,7 @@
" children: {}\n",
"), Sequence(\n",
" format: string\n",
" metadata: {'text': 'podría'}\n",
" metadata: {'text': 'podrÃ\\xada'}\n",
" children: {}\n",
"), Sequence(\n",
" format: string\n",
......@@ -335,7 +340,7 @@
" children: {}\n",
"), Sequence(\n",
" format: string\n",
" metadata: {'text': 'Sé'}\n",
" metadata: {'text': 'Sé'}\n",
" children: {}\n",
"), Sequence(\n",
" format: string\n",
......@@ -343,7 +348,7 @@
" children: {}\n",
"), Sequence(\n",
" format: string\n",
" metadata: {'text': 'abrí'}\n",
" metadata: {'text': 'abrÃ\\xad'}\n",
" children: {}\n",
"), Sequence(\n",
" format: string\n",
......@@ -375,7 +380,7 @@
" children: {}\n",
"), Sequence(\n",
" format: string\n",
" metadata: {'text': 'quedó'}\n",
" metadata: {'text': 'quedó'}\n",
" children: {}\n",
"), Sequence(\n",
" format: string\n",
......@@ -463,7 +468,7 @@
" children: {}\n",
"), Sequence(\n",
" format: string\n",
" metadata: {'text': 'gustaría'}\n",
" metadata: {'text': 'gustarÃ\\xada'}\n",
" children: {}\n",
"), Sequence(\n",
" format: string\n",
......@@ -471,7 +476,7 @@
" children: {}\n",
"), Sequence(\n",
" format: string\n",
" metadata: {'text': 'qué'}\n",
" metadata: {'text': 'qué'}\n",
" children: {}\n",
"), Sequence(\n",
" format: string\n",
......@@ -530,28 +535,28 @@
")"
]
},
"execution_count": 5,
"metadata": {},
"execution_count": 7
"output_type": "execute_result"
}
],
"source": [
"s"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "EYqoiVQBaIAq"
},
"source": [
"Create a POS analyzer and apply it to a sequence:\n",
"+ Store the result of the analyzer in the same level as the analysis level "
],
"metadata": {
"id": "EYqoiVQBaIAq"
}
]
},
{
"cell_type": "code",
"source": [
"posAnalyzer = POSAnalyzer()\n",
"posAnalyzer.analyze(s,\"POS\",\"files/text\")\n",
"s"
],
"execution_count": null,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
......@@ -559,10 +564,8 @@
"id": "OVYdTcDOn04r",
"outputId": "cf00a1a7-1fbf-42fd-ea58-04770d4f4dbc"
},
"execution_count": null,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"Sequence(\n",
......@@ -1004,28 +1007,29 @@
")"
]
},
"execution_count": 8,
"metadata": {},
"execution_count": 8
"output_type": "execute_result"
}
],
"source": [
"posAnalyzer = POSAnalyzer()\n",
"posAnalyzer.analyze(s,\"POS\",\"files/text\")\n",
"s"
]
},
{
"cell_type": "markdown",
"source": [
"+ Store the result of the analyzer in higher levels than the analysis level"
],
"metadata": {
"id": "Y7TlOcGiAJcZ"
}
},
"source": [
"+ Store the result of the analyzer in higher levels than the analysis level"
]
},
{
"cell_type": "code",
"source": [
"s = SequenceDirectory(os.getcwd()+\"/ExampleDirectory\")\n",
"posAnalyzer = POSAnalyzer()\n",
"posAnalyzer.analyze(s,\"POS\",\"text\",\"files\")\n",
"s"
],
"execution_count": null,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
......@@ -1033,10 +1037,8 @@
"id": "1fllpdvltmHH",
"outputId": "d4505c18-843d-44ce-cabb-4d5ff23d5187"
},
"execution_count": null,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"Sequence(\n",
......@@ -1478,42 +1480,54 @@
")"
]
},
"execution_count": 9,
"metadata": {},
"execution_count": 9
"output_type": "execute_result"
}
],
"source": [
"s = SequenceDirectory(os.getcwd()+\"/ExampleDirectory\")\n",
"posAnalyzer = POSAnalyzer()\n",
"posAnalyzer.analyze(s,\"POS\",\"text\",\"files\")\n",
"s"
]
},
{
"cell_type": "markdown",
"source": [
"Use of the filter method of a sequence:"
],
"metadata": {
"id": "0YK8UKkIToJm"
}
},
"source": [
"Use of the filter method of a sequence:"
]
},
{
"cell_type": "markdown",
"source": [
" Criteria function"
],
"metadata": {
"id": "DHbRPI8MUANE"
}
},
"source": [
" Criteria function"
]
},
{
"cell_type": "code",
"source": [
"s = SequenceDirectory(os.getcwd()+\"/ExampleDirectory\")"
],
"execution_count": null,
"metadata": {
"id": "bkytecNImwv3"
},
"execution_count": null,
"outputs": []
"outputs": [],
"source": [
"s = SequenceDirectory(os.getcwd()+\"/ExampleDirectory\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "N3dlbFKCT_KQ"
},
"outputs": [],
"source": [
"def sequenciasMenores4letras(arraySequencias):\n",
" result=[]\n",
......@@ -1521,41 +1535,31 @@
" if len(i.metadata[\"text\"]) < 4:\n",
" result.append(i)\n",
" return result"
],
"metadata": {
"id": "N3dlbFKCT_KQ"
},
"execution_count": null,
"outputs": []
]
},
{
"cell_type": "markdown",
"source": [
"Using de filter"
],
"metadata": {
"id": "ruj6I_hK7MzL"
}
},
"source": [
"Using de filter"
]
},
{
"cell_type": "code",
"source": [
"for i in s.filter(\"files/files\",sequenciasMenores4letras):\n",
" print(i)\n",
" print(type(i))"
],
"execution_count": null,
"metadata": {
"id": "QwHAoIERVHOn",
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "QwHAoIERVHOn",
"outputId": "0c978552-2783-42a2-cb76-063e02cd011c"
},
"execution_count": null,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"output_type": "stream",
"text": [
"que\n",
"<class 'textflow.SequenceString.SequenceString'>\n",
......@@ -1651,19 +1655,29 @@
"<class 'textflow.SequenceString.SequenceString'>\n"
]
}
],
"source": [
"for i in s.filter(\"files/files\",sequenciasMenores4letras):\n",
" print(i)\n",
" print(type(i))"
]
},
{
"cell_type": "markdown",
"source": [
"Creating a new criteria function for use in filtermetada function:"
],
"metadata": {
"id": "cWCfr8xV7l3Y"
}
},
"source": [
"Creating a new criteria function for use in filtermetada function:"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "Mol4alQq7elK"
},
"outputs": [],
"source": [
"def sequenciasMenores4letrasMetadata(arraySequencias):\n",
" result=[]\n",
......@@ -1671,32 +1685,22 @@
" if len(i) < 4:\n",
" result.append(i)\n",
" return result"
],
"metadata": {
"id": "Mol4alQq7elK"
},
"execution_count": null,
"outputs": []
]
},
{
"cell_type": "code",
"source": [
"for i in s.filterMetadata(\"files/files/text\", sequenciasMenores4letrasMetadata):\n",
" print(i)\n",
" print(type(i))"
],
"execution_count": null,
"metadata": {
"id": "wYs7VwmRTxnL",
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "wYs7VwmRTxnL",
"outputId": "536f4b80-6a0e-4092-ba38-7798c90b9c8c"
},
"execution_count": null,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"output_type": "stream",
"text": [
"que\n",
"<class 'str'>\n",
......@@ -1792,7 +1796,37 @@
"<class 'str'>\n"
]
}
],
"source": [
"for i in s.filterMetadata(\"files/files/text\", sequenciasMenores4letrasMetadata):\n",
" print(i)\n",
" print(type(i))"
]
}
]
}
\ No newline at end of file
],
"metadata": {
"colab": {
"collapsed_sections": [],
"name": "Example.ipynb",
"provenance": []
},
"kernelspec": {
"display_name": "Python 3",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.11"
}
},
"nbformat": 4,
"nbformat_minor": 0
}
......@@ -15,7 +15,23 @@ class Test():
self.alpha = alpha
def apply(self,df1,df2):
def apply(self,df1,df2,criteriaColumn1,criteriaColumn2, visualizer = None):
df = pd.concat([df1,df2], axis=1)
numeric_cols = [col for col, dtype in zip(df.columns, df.dtypes) if dtype != 'object']
print("---------------------------------------NORMALITY TEST---------------------------------------")
normal_results =self.applyNormalTest(df)
normal_features= set()
for key in normal_results[1]:
normal_features= normal_features | set(normal_results[key])
print("---------------------------------------PARAMETRIC TEST---------------------------------------")
parametricResults = self.applyParametricTest(df1, df2, criteriaColumn1,criteriaColumn2, normal_features)
print("---------------------------------------NON-PARAMETRIC TEST---------------------------------------")
nonParametricResults = self.applyNonParametricTest(df1, df2, criteriaColumn1,criteriaColumn2, numeric_cols)
dicResults = {"normalTest":normal_results,"parametricTest":parametricResults,"nonParametricTes":nonParametricResults}
return dicResults
#Hay que poner gráficas:
# qUARTIL QUARTIL
# Box Plot
......
......@@ -87,6 +87,10 @@ class Visualization():
plt.show()
pass
def show_wordCloud(self):
pass
def show_wordCloud(self,df,textColumns, groupby=None):
if type(textColumns) == str:
pass
else:
pass
\ No newline at end of file
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or sign in to comment