Commit db57bf4a authored by Mario Chirinos's avatar Mario Chirinos

pagination el comentario

parent 9cf59fb3
...@@ -16,22 +16,24 @@ class NoticiasSpider(scrapy.Spider): ...@@ -16,22 +16,24 @@ class NoticiasSpider(scrapy.Spider):
self.month = getattr(self, "month", None) self.month = getattr(self, "month", None)
self.day = getattr(self, "day", None) self.day = getattr(self, "day", None)
baseURL = "https://elcomentario.ucol.mx/{0}/{1}/{2}/".format(self.year, self.month.zfill(2), self.day.zfill(2)) self.baseURL = "https://elcomentario.ucol.mx/{0}/{1}/{2}/".format(self.year, self.month.zfill(2), self.day.zfill(2))
yield scrapy.Request(url=baseURL, callback=self.parse) yield scrapy.Request(url=self.baseURL, callback=self.parse)
#----------------------------------------------------------------------- #-----------------------------------------------------------------------
def parse(self, response): def parse(self, response):
print(response.url) print("parse", response.url)
for link in response.xpath('//h2[@class="thumb-title"]/a/@href').extract(): for link in response.xpath('//h2[@class="thumb-title"]/a/@href').extract():
yield scrapy.Request(url="https://elcomentario.ucol.mx"+link, callback=self.parse_item) yield scrapy.Request(url="https://elcomentario.ucol.mx"+link, callback=self.parse_item)
next_page = response.xpath('//li[@class="the-next-page"]/a/@href').extract_first() next_page = response.xpath('//li[@class="the-next-page"]/a/@href').extract_first()
next_page = next_page[next_page.find("/"):]
next_page = self.baseURL+"page"+next_page
print("next_page", next_page) print("next_page", next_page)
if next_page is not None: if next_page is not None:
yield scrapy.Request(url=next_page, callback=self.parse) yield scrapy.Request(url=next_page, callback=self.parse)
#----------------------------------------------------------------------- #-----------------------------------------------------------------------
def parse_item(self, response): def parse_item(self, response):
# print(response.url) # print(response.url)
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment