Commit a96d6232 authored by Renán Sosa Guillen's avatar Renán Sosa Guillen

merge foraneos

parents 5f865616 1033155b
......@@ -22,6 +22,7 @@ def remove_tags(text):
return TAG_RE.sub('', text)
DAT_RE = re.compile(r'-\d{8}-')
RE = re.compile(r'\n\xa0')
class ImportantData(scrapy.Item):
......@@ -147,6 +148,12 @@ class QuotesSpider(scrapy.Spider):
for p in response.css('div.news-body').css('p').extract():
text += remove_tags(p) + "\n"
if text == '':
t = remove_tags(response.xpath('//div[@class="news-body"]').extract_first())
res = RE.search(t)
if res:
text = t[:t.rfind(res.group(0))]
item['text'] = text.strip()
item['url'] = response.url
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment