Commit 3cd0913c authored by Renán Sosa Guillen's avatar Renán Sosa Guillen

crawlers

parent 10c46676
...@@ -38,7 +38,9 @@ class QuotesSpider(scrapy.Spider): ...@@ -38,7 +38,9 @@ class QuotesSpider(scrapy.Spider):
item['date'] = response.xpath('//meta[@property="article:published_time"]/@content').extract_first() item['date'] = response.xpath('//meta[@property="article:published_time"]/@content').extract_first()
item['topic'] = response.xpath('//*[@class="breadcrumb"]/span/a/text()').extract()[-2] item['topic'] = response.xpath('//*[@class="breadcrumb"]/span/a/text()').extract()[-2]
item['title'] = response.xpath('//*[@class="pane-content"]/h1/text()').extract_first() item['title'] = response.xpath('//*[@class="pane-content"]/h1/text()').extract_first()
for p in response.xpath('//*[@class="pane-content"]/div/p').extract(): paragraphs = response.xpath('//*[@class="pane-content"]/div/p').extract()
paragraphs.extend(response.xpath('//*[@class="rtejustify"]').extract())
for p in paragraphs:
text += remove_tags(p) + '\n' text += remove_tags(p) + '\n'
item['text'] = text item['text'] = text
item['location'] = response.xpath('//*[@class="field field-name-field-lugar field-type-text field-label-hidden"]/text()').extract_first() item['location'] = response.xpath('//*[@class="field field-name-field-lugar field-type-text field-label-hidden"]/text()').extract_first()
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment