Commit 52d6be74 authored by Renán Sosa Guillen's avatar Renán Sosa Guillen

crawlers

parent ad9b4024
......@@ -66,7 +66,10 @@ class QuotesSpider(scrapy.Spider):
ti = response.xpath('//header[@class="td-post-title"]/h1/text()').extract_first()
item['title'] = ti
for p in response.xpath('//*[@class="td-post-content"]/p').extract():
paragraphs = response.xpath('//*[@class="td-post-content"]/p').extract()
if len(paragraphs) <= 0:
paragraphs = response.xpath('//*[@dir="auto"]').extract()
for p in paragraphs:
text += remove_tags(p) + '\n'
item['text'] = text
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment