Commit 604d6176 authored by Renán Sosa Guillen's avatar Renán Sosa Guillen

Se corrigió lectura de primera página en spider/noticias.py del diarioYucatan

parent 5d1ef2b3
This source diff could not be displayed because it is too large. You can view the blob instead.
This source diff could not be displayed because it is too large. You can view the blob instead.
This source diff could not be displayed because it is too large. You can view the blob instead.
import scrapy
#scrapy crawl noticias -t json --nolog -o noticias.json -a year=2016 month=12 day=24
#scrapy crawl noticias -t json --nolog -o noticias.json -a year=2016 -a month=12 -a day=24
import re
......@@ -35,7 +35,10 @@ class QuotesSpider(scrapy.Spider):
pages = response.css("div.pagination").css("a::attr(href)")[-1].extract()
pages = int(pages[pages.rfind('/')+1:])
for p in range(0,pages):
yield scrapy.Request(url=response.url+"/page/"+str(p+1), callback=self.parse_page)
if ( p == 0 ):
yield scrapy.Request(url=response.url+"/page/"+str(p+1), callback=self.parse_page, dont_filter=True)
else:
yield scrapy.Request(url=response.url+"/page/"+str(p+1), callback=self.parse_page)
def parse_page(self, response):
for link in response.css("div.bp-head").css("h2").css("a::attr(href)").extract():
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment