pagination el comentario

db57bf4a · Mario Chirinos · 9cf59fb3 · db57bf4a
Commit db57bf4a authored May 23, 2023 by Mario Chirinos
Hide whitespace changes
Inline Side-by-side

Showing with 6 additions and 4 deletions

noticias.py spiders/daily/elComentario/elComentario/spiders/noticias.py +6 -4

No files found.
--- a/spiders/daily/elComentario/elComentario/spiders/noticias.py
+++ b/spiders/daily/elComentario/elComentario/spiders/noticias.py
@@ -16,22 +16,24 @@ class NoticiasSpider(scrapy.Spider):
 		self.month = getattr(self, "month", None)
 		self.day   = getattr(self, "day", None)

-		baseURL = "https://elcomentario.ucol.mx/{0}/{1}/{2}/".format(self.year, self.month.zfill(2), self.day.zfill(2))
+		self.baseURL = "https://elcomentario.ucol.mx/{0}/{1}/{2}/".format(self.year, self.month.zfill(2), self.day.zfill(2))

-		yield scrapy.Request(url=baseURL, callback=self.parse)
+		yield scrapy.Request(url=self.baseURL, callback=self.parse)
 		
 	#-----------------------------------------------------------------------
 	def parse(self, response):
-		print(response.url)
+		print("parse", response.url)
 		
 		for link in response.xpath('//h2[@class="thumb-title"]/a/@href').extract():
 			yield scrapy.Request(url="https://elcomentario.ucol.mx"+link, callback=self.parse_item)
 			
 		next_page = response.xpath('//li[@class="the-next-page"]/a/@href').extract_first()
+		next_page = next_page[next_page.find("/"):]
+		next_page = self.baseURL+"page"+next_page
 		print("next_page", next_page)
 		if next_page is not None:
 			yield scrapy.Request(url=next_page, callback=self.parse)
-			
+		
 	#-----------------------------------------------------------------------
 	def parse_item(self, response):
 #		print(response.url)