Commit 50a5da31 authored by Renán Sosa Guillen's avatar Renán Sosa Guillen

detalles

parent 84228051
...@@ -97,6 +97,8 @@ class QuotesSpider(scrapy.Spider): ...@@ -97,6 +97,8 @@ class QuotesSpider(scrapy.Spider):
if title is not None: if title is not None:
title = title.replace('\n','') title = title.replace('\n','')
title = title.replace('\t','') title = title.replace('\t','')
title = title.lstrip()
title = title.rstrip()
item['title'] = title item['title'] = title
item['topic'] = response.xpath('//*[@class="itemCategory"]/a/text()').extract_first() item['topic'] = response.xpath('//*[@class="itemCategory"]/a/text()').extract_first()
......
...@@ -61,7 +61,10 @@ class QuotesSpider(scrapy.Spider): ...@@ -61,7 +61,10 @@ class QuotesSpider(scrapy.Spider):
item['date'] = d item['date'] = d
item['topic'] = response.xpath('//*[@class="entry-crumbs"]/span/a[@class="entry-crumb"]/text()').extract()[2] item['topic'] = response.xpath('//*[@class="entry-crumbs"]/span/a[@class="entry-crumb"]/text()').extract()[2]
item['title'] = response.xpath('//*[@class="td-post-header"]/header/h1/text()').extract_first() ti = response.xpath('//*[@class="td-post-header"]/header/h1/text()').extract_first()
if ti is None:
ti = response.xpath('//header[@class="td-post-title"]/h1/text()').extract_first()
item['title'] = ti
for p in response.xpath('//*[@class="td-post-content"]/p').extract(): for p in response.xpath('//*[@class="td-post-content"]/p').extract():
text += remove_tags(p) + '\n' text += remove_tags(p) + '\n'
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment