Commit b4d9eda3 authored by Mario Chirinos's avatar Mario Chirinos

Opinion La Jornada

parent 211f8b16
......@@ -16,7 +16,7 @@ class NoticiasSpider(scrapy.Spider):
name = 'noticias'
allowed_domains = ['jornada.com.mx']
start_urls = ['https://jornada.com.mx/']
section_list = ["politica", "mundo", "capital", "ciencias", "cultura", "deportes", "economia", "sociedad", "estados", "espectaculos"]
section_list = ["politica", "mundo", "capital", "ciencias", "cultura", "deportes", "economia", "sociedad", "estados", "espectaculos", "opinion"]
#-----------------------------------------------------------------------
def start_requests(self):
year = getattr(self, "year", None)
......@@ -57,8 +57,8 @@ class NoticiasSpider(scrapy.Spider):
topic = remove_tags(topic)
for p in response.css('div.text').css('p').extract():
p = p.replace("<br>", "\n")
text += remove_tags(p) + "\n"
# p = p.replace("<br>", "\n")
text += remove_tags(p) + "\n "
## News item info ##
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment