Commit 39b868fa authored by Mario Chirinos's avatar Mario Chirinos

ljsl url

parent f0b353e1
......@@ -37,8 +37,9 @@ class NoticiasSpider(scrapy.Spider):
item["title"]=response.xpath('//h1[@class="entry-title"]/text()').extract_first()
text=""
for p in response.xpath('//div[@class="the_content_wrapper "]/p').extract():
text += remove_tags(p) + "\n"
text += remove_tags(p) + "\n "
item["text"]=text
item['topic'] = ", ".join(response.xpath('//div[@class="cat-wrapper"]/ul/li/a/text()').extract())
item["url"] = response.url
print(self.allowed_domains, item["title"])
yield(item)
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment