Skip to content
Toggle navigation
P
Projects
G
Groups
S
Snippets
Help
Jaime Collado
/
socialfairness-api
This project
Loading...
Sign in
Toggle navigation
Go to a project
Project
Repository
Issues
0
Merge Requests
0
Pipelines
Wiki
Snippets
Settings
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Commit
db15d94e
authored
Nov 09, 2023
by
Jaime Collado
Browse files
Options
_('Browse Files')
Download
Email Patches
Plain Diff
Minor fixes
parent
7dadb6b5
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
18 additions
and
34 deletions
app/api.py
app/api.py
View file @
db15d94e
...
@@ -86,6 +86,24 @@ async def scrap_url(
...
@@ -86,6 +86,24 @@ async def scrap_url(
)
)
return
{
"comments"
:
scraped_comments
}
return
{
"comments"
:
scraped_comments
}
newspapers
=
{
"elpais"
:
"https://elpais.com/espana/2023-11-07/una-maniobra-judicial-que-amenaza-con-dejar-a-puigdemont-fuera-de-la-ley-de-amnistia.html"
,
"okdiario"
:
"https://okdiario.com/espana/policias-indignados-marlaska-cataluna-aguanto-6-dias-ferraz-cargo-20-minutos-11867826"
,
"elmundo"
:
"https://www.elmundo.es/internacional/2023/11/07/654ab70e21efa00b138b45cc.html"
,
# "20minutos": "https://www.20minutos.es/noticia/5188036/0/cancelan-cuenta-youtube-iker-jimenez-viva-libertad/",
"elconfidencial"
:
"https://www.elconfidencial.com/espana/2023-11-07/puigdemont-fiscal-tsunami-terrorismo-23j_3769680/"
,
"marca"
:
"https://www.marca.com/futbol/real-madrid/2023/11/08/654a7e8e22601d772a8b458a.html"
,
"abc"
:
"https://www.abc.es/espana/madrid/detenidos-empleados-empresa-desokupa-antecedentes-coaccionar-armas-20231107040950-nt.html?ref=https
%3
A
%2
F
%2
Fwww.abc.es
%2
F"
,
"elespanol"
:
"https://www.elespanol.com/espana/20231108/crece-protesta-sede-psoe-disturbios-acaban-batalla-campal-heridos/808169177_0.html"
,
"ntvespana"
:
"https://ntvespana.com/26/10/2023/la-burbuja-de-josue-cardenas-entrevistas-a-pio-moa-y-ramon-peralta-por-josue-cardenas/"
,
# "theobjective": "https://theobjective.com/espana/politica/2023-11-08/costa-sanchez-consejo-europeo/",
"elperiodistadigital"
:
"https://www.periodistadigital.com/periodismo/periodismo-online/20231108/ardio-ferraz-margenes-democracia-han-sobrepasado-video-689404954226/"
,
"vozpopuli"
:
"https://www.vozpopuli.com/espana/pedro-sanchez-concentraciones-sedes-socialistas.html"
,
"eldebate"
:
"https://www.eldebate.com/espana/20231108/el-ico-de-calvino-concedio-86400-a-la-galeria-de-arte-del-hijo-mayor-de-pujol-con-la-que-blanqueaba-comisiones_152023.html"
,
"alertadigital"
:
"https://www.alertadigital.com/2023/11/07/de-la-primavera-arabe-al-otono-espanol/"
}
@app.get
(
"/test-scrapers"
,
response_model
=
schemas
.
ScrapersTested
,
tags
=
[
"Scraper"
])
@app.get
(
"/test-scrapers"
,
response_model
=
schemas
.
ScrapersTested
,
tags
=
[
"Scraper"
])
async
def
test_scrapers
():
async
def
test_scrapers
():
"""Tests whether the scrapers work or not.
"""Tests whether the scrapers work or not.
...
@@ -95,23 +113,6 @@ async def test_scrapers():
...
@@ -95,23 +113,6 @@ async def test_scrapers():
"""
"""
start
=
time
.
time
()
start
=
time
.
time
()
newspapers
=
{
"elpais"
:
"https://elpais.com/espana/2023-11-07/una-maniobra-judicial-que-amenaza-con-dejar-a-puigdemont-fuera-de-la-ley-de-amnistia.html"
,
"okdiario"
:
"https://okdiario.com/espana/policias-indignados-marlaska-cataluna-aguanto-6-dias-ferraz-cargo-20-minutos-11867826"
,
"elmundo"
:
"https://www.elmundo.es/internacional/2023/11/07/654ab70e21efa00b138b45cc.html"
,
# "20minutos": "https://www.20minutos.es/noticia/5188036/0/cancelan-cuenta-youtube-iker-jimenez-viva-libertad/",
"elconfidencial"
:
"https://www.elconfidencial.com/espana/2023-11-07/puigdemont-fiscal-tsunami-terrorismo-23j_3769680/"
,
"marca"
:
"https://www.marca.com/futbol/real-madrid/2023/11/08/654a7e8e22601d772a8b458a.html"
,
"abc"
:
"https://www.abc.es/espana/madrid/detenidos-empleados-empresa-desokupa-antecedentes-coaccionar-armas-20231107040950-nt.html?ref=https
%3
A
%2
F
%2
Fwww.abc.es
%2
F"
,
"elespanol"
:
"https://www.elespanol.com/espana/20231108/crece-protesta-sede-psoe-disturbios-acaban-batalla-campal-heridos/808169177_0.html"
,
"ntvespana"
:
"https://ntvespana.com/26/10/2023/la-burbuja-de-josue-cardenas-entrevistas-a-pio-moa-y-ramon-peralta-por-josue-cardenas/"
,
# "theobjective": "https://theobjective.com/espana/politica/2023-11-08/costa-sanchez-consejo-europeo/",
"elperiodistadigital"
:
"https://www.periodistadigital.com/periodismo/periodismo-online/20231108/ardio-ferraz-margenes-democracia-han-sobrepasado-video-689404954226/"
,
"vozpopuli"
:
"https://www.vozpopuli.com/espana/pedro-sanchez-concentraciones-sedes-socialistas.html"
,
"eldebate"
:
"https://www.eldebate.com/espana/20231108/el-ico-de-calvino-concedio-86400-a-la-galeria-de-arte-del-hijo-mayor-de-pujol-con-la-que-blanqueaba-comisiones_152023.html"
,
"alertadigital"
:
"https://www.alertadigital.com/2023/11/07/de-la-primavera-arabe-al-otono-espanol/"
}
mp
.
set_start_method
(
"spawn"
)
# Fix for FastAPI process shutting down when all processes end. Seen here: https://github.com/tiangolo/fastapi/issues/1487
mp
.
set_start_method
(
"spawn"
)
# Fix for FastAPI process shutting down when all processes end. Seen here: https://github.com/tiangolo/fastapi/issues/1487
with
mp
.
Pool
(
processes
=
8
)
as
pool
:
with
mp
.
Pool
(
processes
=
8
)
as
pool
:
test_results
=
pool
.
map
(
test_scraper
,
newspapers
.
values
())
test_results
=
pool
.
map
(
test_scraper
,
newspapers
.
values
())
...
@@ -133,23 +134,6 @@ async def test_scrapers_deprecated():
...
@@ -133,23 +134,6 @@ async def test_scrapers_deprecated():
"""
"""
start
=
time
.
time
()
start
=
time
.
time
()
newspapers
=
{
"elpais"
:
"https://elpais.com/espana/2023-11-07/una-maniobra-judicial-que-amenaza-con-dejar-a-puigdemont-fuera-de-la-ley-de-amnistia.html"
,
"okdiario"
:
"https://okdiario.com/espana/policias-indignados-marlaska-cataluna-aguanto-6-dias-ferraz-cargo-20-minutos-11867826"
,
"elmundo"
:
"https://www.elmundo.es/internacional/2023/11/07/654ab70e21efa00b138b45cc.html"
,
# "20minutos": "https://www.20minutos.es/noticia/5188036/0/cancelan-cuenta-youtube-iker-jimenez-viva-libertad/",
"elconfidencial"
:
"https://www.elconfidencial.com/espana/2023-11-07/puigdemont-fiscal-tsunami-terrorismo-23j_3769680/"
,
"marca"
:
"https://www.marca.com/futbol/real-madrid/2023/11/08/654a7e8e22601d772a8b458a.html"
,
"abc"
:
"https://www.abc.es/espana/madrid/detenidos-empleados-empresa-desokupa-antecedentes-coaccionar-armas-20231107040950-nt.html?ref=https
%3
A
%2
F
%2
Fwww.abc.es
%2
F"
,
"elespanol"
:
"https://www.elespanol.com/espana/20231108/crece-protesta-sede-psoe-disturbios-acaban-batalla-campal-heridos/808169177_0.html"
,
"ntvespana"
:
"https://ntvespana.com/26/10/2023/la-burbuja-de-josue-cardenas-entrevistas-a-pio-moa-y-ramon-peralta-por-josue-cardenas/"
,
# "theobjective": "https://theobjective.com/espana/politica/2023-11-08/costa-sanchez-consejo-europeo/",
"elperiodistadigital"
:
"https://www.periodistadigital.com/periodismo/periodismo-online/20231108/ardio-ferraz-margenes-democracia-han-sobrepasado-video-689404954226/"
,
"vozpopuli"
:
"https://www.vozpopuli.com/espana/pedro-sanchez-concentraciones-sedes-socialistas.html"
,
"eldebate"
:
"https://www.eldebate.com/espana/20231108/el-ico-de-calvino-concedio-86400-a-la-galeria-de-arte-del-hijo-mayor-de-pujol-con-la-que-blanqueaba-comisiones_152023.html"
,
"alertadigital"
:
"https://www.alertadigital.com/2023/11/07/de-la-primavera-arabe-al-otono-espanol/"
}
test_result
=
{
k
:
False
for
k
,
_
in
newspapers
.
items
()}
test_result
=
{
k
:
False
for
k
,
_
in
newspapers
.
items
()}
for
newspaper
,
url
in
newspapers
.
items
():
for
newspaper
,
url
in
newspapers
.
items
():
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment