Commit 604d6176 authored by Renán Sosa Guillen's avatar Renán Sosa Guillen

Se corrigió lectura de primera página en spider/noticias.py del diarioYucatan

parent 5d1ef2b3
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
import scrapy
#scrapy crawl noticias -t json --nolog -o noticias.json -a year=2016 month=12 day=24
#scrapy crawl noticias -t json --nolog -o noticias.json -a year=2016 -a month=12 -a day=24
import re
......@@ -35,6 +35,9 @@ class QuotesSpider(scrapy.Spider):
pages = response.css("div.pagination").css("a::attr(href)")[-1].extract()
pages = int(pages[pages.rfind('/')+1:])
for p in range(0,pages):
if ( p == 0 ):
yield scrapy.Request(url=response.url+"/page/"+str(p+1), callback=self.parse_page, dont_filter=True)
else:
yield scrapy.Request(url=response.url+"/page/"+str(p+1), callback=self.parse_page)
def parse_page(self, response):
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment