Commit 604d6176 authored by Renán Sosa Guillen's avatar Renán Sosa Guillen

Se corrigió lectura de primera página en spider/noticias.py del diarioYucatan

parent 5d1ef2b3
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
No preview for this file type
No preview for this file type
No preview for this file type
import scrapy
#scrapy crawl noticias -t json --nolog -o noticias.json -a year=2016 month=12 day=24
#scrapy crawl noticias -t json --nolog -o noticias.json -a year=2016 -a month=12 -a day=24
import re
......@@ -35,6 +35,9 @@ class QuotesSpider(scrapy.Spider):
pages = response.css("div.pagination").css("a::attr(href)")[-1].extract()
pages = int(pages[pages.rfind('/')+1:])
for p in range(0,pages):
if ( p == 0 ):
yield scrapy.Request(url=response.url+"/page/"+str(p+1), callback=self.parse_page, dont_filter=True)
else:
yield scrapy.Request(url=response.url+"/page/"+str(p+1), callback=self.parse_page)
def parse_page(self, response):
......
No preview for this file type
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment