Skip to content
Toggle navigation
P
Projects
G
Groups
S
Snippets
Help
Jaime Collado
/
socialfairness-api
This project
Loading...
Sign in
Toggle navigation
Go to a project
Project
Repository
Issues
0
Merge Requests
0
Pipelines
Wiki
Snippets
Settings
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Commit
eacdd3be
authored
Nov 08, 2023
by
Jaime Collado
Browse files
Options
_('Browse Files')
Download
Email Patches
Plain Diff
Scraper testing endpoint working
parent
7b90ae02
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
47 additions
and
7 deletions
app/api.py
app/main.py
app/schemas.py
app/api.py
View file @
eacdd3be
from
fastapi
import
FastAPI
,
HTTPException
,
status
from
fastapi
import
FastAPI
,
HTTPException
,
status
from
fastapi.middleware.cors
import
CORSMiddleware
from
fastapi.middleware.cors
import
CORSMiddleware
from
.
import
schemas
import
schemas
from
.
ScraperNoticias.globalScraper
import
GlobalScraper
from
ScraperNoticias.globalScraper
import
GlobalScraper
import
time
import
time
...
@@ -75,4 +75,40 @@ async def scrap_url(
...
@@ -75,4 +75,40 @@ async def scrap_url(
status_code
=
status
.
HTTP_404_NOT_FOUND
,
status_code
=
status
.
HTTP_404_NOT_FOUND
,
detail
=
"Comments not found"
detail
=
"Comments not found"
)
)
return
{
"comments"
:
scraped_comments
}
return
{
"comments"
:
scraped_comments
}
\ No newline at end of file
@app.get
(
"/test-scraper"
,
response_model
=
schemas
.
ScrapersTested
,
tags
=
[
"Scraper"
])
async
def
test_scraper
():
"""Tests whether the scrapers work or not.
Returns:
A dictionary with the name of the newspaper as key and a boolean as value.
"""
newspapers
=
{
"elpais"
:
"https://elpais.com/espana/2023-11-07/una-maniobra-judicial-que-amenaza-con-dejar-a-puigdemont-fuera-de-la-ley-de-amnistia.html"
,
"okdiario"
:
"https://okdiario.com/espana/policias-indignados-marlaska-cataluna-aguanto-6-dias-ferraz-cargo-20-minutos-11867826"
,
"elmundo"
:
"https://www.elmundo.es/internacional/2023/11/07/654ab70e21efa00b138b45cc.html"
,
"20minutos"
:
"https://www.20minutos.es/noticia/5188036/0/cancelan-cuenta-youtube-iker-jimenez-viva-libertad/"
,
"elconfidencial"
:
"https://www.elconfidencial.com/espana/2023-11-07/puigdemont-fiscal-tsunami-terrorismo-23j_3769680/"
,
"marca"
:
"https://www.marca.com/futbol/real-madrid/2023/11/08/654a7e8e22601d772a8b458a.html"
,
"abc"
:
"https://www.abc.es/espana/madrid/detenidos-empleados-empresa-desokupa-antecedentes-coaccionar-armas-20231107040950-nt.html?ref=https
%3
A
%2
F
%2
Fwww.abc.es
%2
F"
,
"elespanol"
:
"https://www.elespanol.com/espana/20231108/crece-protesta-sede-psoe-disturbios-acaban-batalla-campal-heridos/808169177_0.html"
,
"ntvespana"
:
"https://ntvespana.com/26/10/2023/la-burbuja-de-josue-cardenas-entrevistas-a-pio-moa-y-ramon-peralta-por-josue-cardenas/"
,
"theobjective"
:
"https://theobjective.com/espana/politica/2023-11-08/costa-sanchez-consejo-europeo/"
,
"elperiodistadigital"
:
"https://www.periodistadigital.com/periodismo/periodismo-online/20231108/ardio-ferraz-margenes-democracia-han-sobrepasado-video-689404954226/"
,
"vozpopuli"
:
"https://www.vozpopuli.com/espana/pedro-sanchez-concentraciones-sedes-socialistas.html"
,
"eldebate"
:
"https://www.eldebate.com/espana/20231108/el-ico-de-calvino-concedio-86400-a-la-galeria-de-arte-del-hijo-mayor-de-pujol-con-la-que-blanqueaba-comisiones_152023.html"
,
"alertadigital"
:
"https://www.alertadigital.com/2023/11/07/de-la-primavera-arabe-al-otono-espanol/"
}
test_result
=
{
k
:
False
for
k
,
_
in
newspapers
.
items
()}
for
newspaper
,
url
in
newspapers
.
items
():
scraper
=
GlobalScraper
(
url
)
df
=
scraper
.
process
()
scraped_comments
=
df
.
comments
.
tolist
()
if
scraped_comments
:
test_result
[
newspaper
]
=
True
return
{
"scrapers"
:
test_result
}
\ No newline at end of file
app/main.py
View file @
eacdd3be
import
uvicorn
import
uvicorn
import
argparse
import
argparse
from
.
api
import
app
from
api
import
app
if
__name__
==
'__main__'
:
if
__name__
==
'__main__'
:
parser
=
argparse
.
ArgumentParser
()
parser
=
argparse
.
ArgumentParser
()
...
...
app/schemas.py
View file @
eacdd3be
...
@@ -15,4 +15,8 @@ class InputURL(BaseModel):
...
@@ -15,4 +15,8 @@ class InputURL(BaseModel):
class
ScrapedComments
(
BaseModel
):
class
ScrapedComments
(
BaseModel
):
"""Schema to define the output structure of the scraped comments."""
"""Schema to define the output structure of the scraped comments."""
comments
:
list
[
str
]
comments
:
list
[
str
]
\ No newline at end of file
class
ScrapersTested
(
BaseModel
):
"""Schema to define the output structure of the tested scrapers."""
scrapers
:
dict
[
str
,
bool
]
\ No newline at end of file
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment