Commit 085369f4 by Jaime Collado

Testing runs in parallel to make it faster

parent eacdd3be
Showing with 29 additions and 11 deletions
from multiprocessing import Pool
from fastapi import FastAPI, HTTPException, status from fastapi import FastAPI, HTTPException, status
from fastapi.middleware.cors import CORSMiddleware from fastapi.middleware.cors import CORSMiddleware
...@@ -20,6 +22,21 @@ app.add_middleware( ...@@ -20,6 +22,21 @@ app.add_middleware(
allow_headers=["*"], allow_headers=["*"],
) )
# ---------- UTILS ----------
def test_scraper(url):
scraped_comments = scrap(url)
if scraped_comments:
return True
return False
def scrap(url):
scraper = GlobalScraper(url)
df = scraper.process()
scraped_comments = df.comments.tolist()
return scraped_comments
# ---------- HTTP METHODS ---------- # ---------- HTTP METHODS ----------
@app.post("/confiabilidad", response_model=schemas.OutputPrediction, tags=["Prediction"]) @app.post("/confiabilidad", response_model=schemas.OutputPrediction, tags=["Prediction"])
async def predict_reliability( async def predict_reliability(
...@@ -67,9 +84,7 @@ async def scrap_url( ...@@ -67,9 +84,7 @@ async def scrap_url(
time.sleep(10) time.sleep(10)
print("Después del sleep") print("Después del sleep")
scraper = GlobalScraper(url.url) scraped_comments = scrap(url.url)
df = scraper.process()
scraped_comments = df.comments.tolist()
if not scraped_comments: if not scraped_comments:
raise HTTPException( raise HTTPException(
status_code=status.HTTP_404_NOT_FOUND, status_code=status.HTTP_404_NOT_FOUND,
...@@ -78,7 +93,7 @@ async def scrap_url( ...@@ -78,7 +93,7 @@ async def scrap_url(
return {"comments": scraped_comments} return {"comments": scraped_comments}
@app.get("/test-scraper", response_model=schemas.ScrapersTested, tags=["Scraper"]) @app.get("/test-scraper", response_model=schemas.ScrapersTested, tags=["Scraper"])
async def test_scraper(): async def test_scrapers():
"""Tests whether the scrapers work or not. """Tests whether the scrapers work or not.
Returns: Returns:
...@@ -104,11 +119,15 @@ async def test_scraper(): ...@@ -104,11 +119,15 @@ async def test_scraper():
test_result = {k: False for k, _ in newspapers.items()} test_result = {k: False for k, _ in newspapers.items()}
pool = Pool(processes=14)
for newspaper, url in newspapers.items(): for newspaper, url in newspapers.items():
scraper = GlobalScraper(url) test_result[newspaper] = pool.apply_async(test_scraper, [url])
df = scraper.process()
scraped_comments = df.comments.tolist() pool.close()
if scraped_comments: pool.join()
test_result[newspaper] = True
final = {k: v.get() for k, v in test_result.items()}
return {"scrapers": final}
return {"scrapers": test_result}
\ No newline at end of file
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or sign in to comment