Commit 80a29bca authored by Renán Sosa Guillen's avatar Renán Sosa Guillen

crawlers

parent a4a62474
......@@ -45,12 +45,25 @@ class QuotesSpider(scrapy.Spider):
def parse(self, response):
yield scrapy.Request(url=response.url, callback=self.parse_page, dont_filter=True)
lastPage = response.xpath('//div[@class="numbered-pagination"]/a[@class="pagi-last"]/@href').extract_first()
if lastPage is None:
lastPage = response.xpath('//div[@class="numbered-pagination"]/a/@href').extract()[-1]
if lastPage is not None and lastPage != '':
lastPage = lastPage.strip('/')
lastPage = int(lastPage[lastPage.rfind('/')+1:])
for page in range(1, lastPage):
yield scrapy.Request(url=self.baseURL + "/page/" + str(page+1), callback=self.parse_page)
def parse_page(self, response):
for link in response.xpath('//div[@id="main"]/div/h2[@class="entry_title"]/a/@href').extract():
yield scrapy.Request(url=link, callback=self.parse_item)
nextPage = response.xpath('//div[@class="numbered-pagination"]/a[@class="pagi-next"]/@href').extract_first()
if nextPage is not None and nextPage != '':
print nextPage
yield scrapy.Request(url=nextPage, callback=self.parse)
......
......@@ -34,11 +34,14 @@ class QuotesSpider(scrapy.Spider):
def parse(self, response):
yield scrapy.Request(url=response.url, callback=self.parse_page, dont_filter=True)
lastPage = response.xpath('//*[@class="page-nav td-pb-padding-side"]/a[@class="last"]/text()').extract_first()
lastPage = response.xpath('//*[@class="page-nav td-pb-padding-side"]/a[@class="last"]/@href').extract_first()
if lastPage is None:
lastPage = response.xpath('//*[@class="page-nav td-pb-padding-side"]/a/@href').extract()[-1]
if lastPage is not None and lastPage != '':
lastPage = int(lastPage)
lastPage = lastPage.strip('/')
lastPage = int(lastPage[lastPage.rfind('/')+1:])
for page in range(1,lastPage):
for page in range(1, lastPage):
yield scrapy.Request(url=self.baseURL + "/page/" + str(page+1), callback=self.parse_page)
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment