Commit db57bf4a authored by Mario Chirinos's avatar Mario Chirinos

pagination el comentario

parent 9cf59fb3
......@@ -16,22 +16,24 @@ class NoticiasSpider(scrapy.Spider):
self.month = getattr(self, "month", None)
self.day = getattr(self, "day", None)
baseURL = "https://elcomentario.ucol.mx/{0}/{1}/{2}/".format(self.year, self.month.zfill(2), self.day.zfill(2))
self.baseURL = "https://elcomentario.ucol.mx/{0}/{1}/{2}/".format(self.year, self.month.zfill(2), self.day.zfill(2))
yield scrapy.Request(url=baseURL, callback=self.parse)
yield scrapy.Request(url=self.baseURL, callback=self.parse)
#-----------------------------------------------------------------------
def parse(self, response):
print(response.url)
print("parse", response.url)
for link in response.xpath('//h2[@class="thumb-title"]/a/@href').extract():
yield scrapy.Request(url="https://elcomentario.ucol.mx"+link, callback=self.parse_item)
next_page = response.xpath('//li[@class="the-next-page"]/a/@href').extract_first()
next_page = next_page[next_page.find("/"):]
next_page = self.baseURL+"page"+next_page
print("next_page", next_page)
if next_page is not None:
yield scrapy.Request(url=next_page, callback=self.parse)
#-----------------------------------------------------------------------
def parse_item(self, response):
# print(response.url)
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment