Commit d66f91ca authored by Renán Sosa Guillen's avatar Renán Sosa Guillen

Ajustes por cambios en la página.

parent 2f7d1d20
...@@ -35,7 +35,7 @@ class QuotesSpider(scrapy.Spider): ...@@ -35,7 +35,7 @@ class QuotesSpider(scrapy.Spider):
def parse(self, response): def parse(self, response):
pagination = response.xpath('//div[@class="pagination"]/a/@href').extract() pagination = response.xpath('//div[@class="pages"]/a/@href').extract()
if ( len(pagination) > 0 ): if ( len(pagination) > 0 ):
pagination = pagination[-1].strip('/') pagination = pagination[-1].strip('/')
pages = int(pagination[pagination.rfind('/')+1:]) pages = int(pagination[pagination.rfind('/')+1:])
...@@ -49,7 +49,7 @@ class QuotesSpider(scrapy.Spider): ...@@ -49,7 +49,7 @@ class QuotesSpider(scrapy.Spider):
def parse_page(self, response): def parse_page(self, response):
for link in response.xpath('//*[@class="post_title"]/h3/a/@href').extract(): for link in response.xpath('//*[@class="post-title"]/h2/a/@href').extract():
yield scrapy.Request(url=link, callback=self.parse_item) yield scrapy.Request(url=link, callback=self.parse_item)
...@@ -57,8 +57,8 @@ class QuotesSpider(scrapy.Spider): ...@@ -57,8 +57,8 @@ class QuotesSpider(scrapy.Spider):
item = NoticiasItem() item = NoticiasItem()
text = '' text = ''
item['date'] = response.xpath('//meta[@property="article:published_time"]/@content').extract_first() item['date'] = response.xpath('//meta[@property="article:published_time"]/@content').extract_first()
item['title'] = response.css('h2.entry-title::text').extract_first() item['title'] = response.css('h1.entry-title::text').extract_first()
item['topic'] = response.xpath('//span[@class="meta-cat"]/a/text()').extract() item['topic'] = response.xpath('//ul[@class="post-categories"]/li/a/text()').extract()
for paragraph in response.xpath('//p[@style="text-align: justify;"]/text()').extract(): for paragraph in response.xpath('//p[@style="text-align: justify;"]/text()').extract():
text += remove_tags(paragraph) + '\n' text += remove_tags(paragraph) + '\n'
item['text'] = text item['text'] = text
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment