Commit d19cd971 by Jaime Collado

Added classifiers

parent 227a17ae
...@@ -37,51 +37,50 @@ def scrap(url): ...@@ -37,51 +37,50 @@ def scrap(url):
# ---------- HTTP METHODS ---------- # ---------- HTTP METHODS ----------
@app.post("/confiabilidad", response_model=schemas.OutputPrediction, tags=["Prediction"]) @app.post("/confiabilidad", response_model=schemas.OutputPredictions, tags=["Prediction"])
async def predict_reliability( async def predict_reliability(
text: schemas.InputText texts: schemas.InputTexts
): ):
"""Predicts reliability based on a given input text. """Predicts reliability based on a news text.
Args: Args:
text: String containing news. texts: String containing news.
Returns: Returns:
The reliability of the text.""" The reliability of the texts."""
# TODO: Aquí invocamos al modelo de confiabilidad y devolvemos la predicción. # TODO: Aquí invocamos al modelo de confiabilidad y devolvemos la predicción.
return {"prediction": 0.0} return {"predictions": [{"label": "Confiable", "score": 0.0}]}
@app.post("/toxicidad", response_model=schemas.OutputPrediction, tags=["Prediction"]) @app.post("/toxicidad", response_model=schemas.OutputPredictions, tags=["Prediction"])
async def predict_toxicity( async def predict_toxicity(
text: schemas.InputText texts: schemas.InputTexts
): ):
"""Predicts toxcity based on a given input text. """Predicts toxicity based on a given input texts.
Args: Args:
text: String containing news. texts: List of comments in string format.
Returns: Returns:
The toxicity of the text.""" The texts' toxicity containing the label and score."""
pred = classifiers.predict_toxicity(text.text) pred = classifiers.predict_toxicity(texts)
print(pred, type(pred)) return {"predictions": pred}
return pred[0]
@app.post("/constructividad", response_model=schemas.OutputPrediction, tags=["Prediction"]) @app.post("/constructividad", response_model=schemas.OutputPredictions, tags=["Prediction"])
async def predict_constructiveness( async def predict_constructiveness(
text: schemas.InputText texts: schemas.InputTexts
): ):
"""Predicts constructiveness based on a given input text. """Predicts constructiveness based on a given input texts.
Args: Args:
text: String containing news. texts: List of comments in string format.
Returns: Returns:
The constructiveness of the text.""" The texts' constructiveness containing the label and score."""
pred = classifiers.predict_constructiveness(text.text) pred = classifiers.predict_constructiveness(texts)
return pred[0] return {"predictions": pred}
@app.post("/scrap", response_model=schemas.ScrapedComments, tags=["Scraper"]) @app.post("/scrap", response_model=schemas.ScrapedComments, tags=["Scraper"])
async def scrap_url( async def scrap_url(
...@@ -114,7 +113,7 @@ newspapers = { ...@@ -114,7 +113,7 @@ newspapers = {
"abc": "https://www.abc.es/espana/madrid/detenidos-empleados-empresa-desokupa-antecedentes-coaccionar-armas-20231107040950-nt.html?ref=https%3A%2F%2Fwww.abc.es%2F", "abc": "https://www.abc.es/espana/madrid/detenidos-empleados-empresa-desokupa-antecedentes-coaccionar-armas-20231107040950-nt.html?ref=https%3A%2F%2Fwww.abc.es%2F",
"elespanol": "https://www.elespanol.com/espana/20231108/crece-protesta-sede-psoe-disturbios-acaban-batalla-campal-heridos/808169177_0.html", "elespanol": "https://www.elespanol.com/espana/20231108/crece-protesta-sede-psoe-disturbios-acaban-batalla-campal-heridos/808169177_0.html",
"ntvespana": "https://ntvespana.com/26/10/2023/la-burbuja-de-josue-cardenas-entrevistas-a-pio-moa-y-ramon-peralta-por-josue-cardenas/", "ntvespana": "https://ntvespana.com/26/10/2023/la-burbuja-de-josue-cardenas-entrevistas-a-pio-moa-y-ramon-peralta-por-josue-cardenas/",
# "theobjective": "https://theobjective.com/espana/politica/2023-11-08/costa-sanchez-consejo-europeo/", "theobjective": "https://theobjective.com/espana/politica/2023-11-08/costa-sanchez-consejo-europeo/",
"elperiodistadigital": "https://www.periodistadigital.com/periodismo/periodismo-online/20231108/ardio-ferraz-margenes-democracia-han-sobrepasado-video-689404954226/", "elperiodistadigital": "https://www.periodistadigital.com/periodismo/periodismo-online/20231108/ardio-ferraz-margenes-democracia-han-sobrepasado-video-689404954226/",
"vozpopuli": "https://www.vozpopuli.com/espana/pedro-sanchez-concentraciones-sedes-socialistas.html", "vozpopuli": "https://www.vozpopuli.com/espana/pedro-sanchez-concentraciones-sedes-socialistas.html",
"eldebate": "https://www.eldebate.com/espana/20231108/el-ico-de-calvino-concedio-86400-a-la-galeria-de-arte-del-hijo-mayor-de-pujol-con-la-que-blanqueaba-comisiones_152023.html", "eldebate": "https://www.eldebate.com/espana/20231108/el-ico-de-calvino-concedio-86400-a-la-galeria-de-arte-del-hijo-mayor-de-pujol-con-la-que-blanqueaba-comisiones_152023.html",
...@@ -132,7 +131,10 @@ async def test_scrapers(): ...@@ -132,7 +131,10 @@ async def test_scrapers():
mp.set_start_method("spawn") # Fix for FastAPI process shutting down when all processes end. Seen here: https://github.com/tiangolo/fastapi/issues/1487 mp.set_start_method("spawn") # Fix for FastAPI process shutting down when all processes end. Seen here: https://github.com/tiangolo/fastapi/issues/1487
with mp.Pool(processes=8) as pool: with mp.Pool(processes=8) as pool:
test_results = pool.map(test_scraper, newspapers.values()) try:
test_results = pool.map(test_scraper, newspapers.values())
except:
print("Problema con algún scraper")
scrapers_test = {newspaper: test_result for newspaper, test_result in zip(newspapers.keys(), test_results)} scrapers_test = {newspaper: test_result for newspaper, test_result in zip(newspapers.keys(), test_results)}
......
from transformers import pipeline
def predict_toxicity(texts):
print(texts.texts)
pipe = pipeline("text-classification", model="rsepulvedat/Toxicity_model")
return pipe(texts.texts)
def predict_constructiveness(texts):
pipe = pipeline("text-classification", model="rsepulvedat/Constructive_model")
return pipe(texts.texts)
\ No newline at end of file
...@@ -2,14 +2,13 @@ from pydantic import BaseModel, HttpUrl ...@@ -2,14 +2,13 @@ from pydantic import BaseModel, HttpUrl
# ---------- DATA SCHEMAS ---------- # ---------- DATA SCHEMAS ----------
class InputText(BaseModel): class InputTexts(BaseModel):
"""Schema to define the input structure of the data to predict.""" """Schema to define the input structure of the data to predict."""
text: str texts: list[str]
class OutputPrediction(BaseModel): class OutputPredictions(BaseModel):
"""Schema to define the output predictions' structure.""" """Schema to define the output predictions' structure."""
label: str predictions: list[dict]
score: float
class InputURL(BaseModel): class InputURL(BaseModel):
url: HttpUrl url: HttpUrl
......
...@@ -18,11 +18,13 @@ fastapi==0.89.1 ...@@ -18,11 +18,13 @@ fastapi==0.89.1
feedfinder2==0.0.4 feedfinder2==0.0.4
feedparser==6.0.10 feedparser==6.0.10
filelock==3.12.0 filelock==3.12.0
fsspec==2023.12.2
greenlet==2.0.2 greenlet==2.0.2
h11==0.14.0 h11==0.14.0
httpcore==0.16.3 httpcore==0.16.3
httptools==0.5.0 httptools==0.5.0
httpx==0.23.3 httpx==0.23.3
huggingface-hub==0.19.4
idna==3.4 idna==3.4
itsdangerous==2.1.2 itsdangerous==2.1.2
jieba3k==0.35.1 jieba3k==0.35.1
...@@ -31,17 +33,21 @@ joblib==1.2.0 ...@@ -31,17 +33,21 @@ joblib==1.2.0
lockfile==0.12.2 lockfile==0.12.2
lxml==4.9.2 lxml==4.9.2
MarkupSafe==2.1.2 MarkupSafe==2.1.2
mpmath==1.3.0
networkx==3.0
newspaper3k==0.2.8 newspaper3k==0.2.8
nltk==3.8.1 nltk==3.8.1
numpy==1.24.2 numpy==1.24.2
orjson==3.8.5 orjson==3.8.5
outcome==1.2.0 outcome==1.2.0
packaging==23.2
pandas==2.0.2 pandas==2.0.2
passlib==1.7.4 passlib==1.7.4
pid==3.0.4 pid==3.0.4
Pillow==9.5.0 Pillow==9.5.0
pyasn1==0.4.8 pyasn1==0.4.8
pycparser==2.21 pycparser==2.21
pycryptodome==3.19.0
pydantic==1.10.4 pydantic==1.10.4
PySocks==1.7.1 PySocks==1.7.1
python-daemon==3.0.1 python-daemon==3.0.1
...@@ -56,6 +62,7 @@ requests==2.28.2 ...@@ -56,6 +62,7 @@ requests==2.28.2
requests-file==1.5.1 requests-file==1.5.1
rfc3986==1.5.0 rfc3986==1.5.0
rsa==4.9 rsa==4.9
safetensors==0.4.1
scikit-learn==1.0.2 scikit-learn==1.0.2
scipy==1.10.0 scipy==1.10.0
selenium==4.8.2 selenium==4.8.2
...@@ -68,12 +75,19 @@ sortedcontainers==2.4.0 ...@@ -68,12 +75,19 @@ sortedcontainers==2.4.0
soupsieve==2.4 soupsieve==2.4
SQLAlchemy==2.0.2 SQLAlchemy==2.0.2
starlette==0.22.0 starlette==0.22.0
sympy==1.12
threadpoolctl==3.1.0 threadpoolctl==3.1.0
tinysegmenter==0.3 tinysegmenter==0.3
tldextract==3.4.4 tldextract==3.4.4
tokenizers==0.15.0
torch==2.1.1+cu118
torchaudio==2.1.1+cu118
torchvision==0.16.1+cu118
tqdm==4.65.0 tqdm==4.65.0
transformers==4.36.1
trio==0.22.0 trio==0.22.0
trio-websocket==0.10.2 trio-websocket==0.10.2
triton==2.1.0
typing_extensions==4.4.0 typing_extensions==4.4.0
tzdata==2023.3 tzdata==2023.3
ujson==5.7.0 ujson==5.7.0
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or sign in to comment