Commit 2e6e04a0 authored by Renán Sosa Guillen's avatar Renán Sosa Guillen

crawlers

parent f4662908
File mode changed from 100644 to 100755
File mode changed from 100644 to 100755
File mode changed from 100644 to 100755
File mode changed from 100644 to 100755
File mode changed from 100644 to 100755
File mode changed from 100644 to 100755
File mode changed from 100644 to 100755
File mode changed from 100644 to 100755
File mode changed from 100644 to 100755
File mode changed from 100644 to 100755
File mode changed from 100644 to 100755
......@@ -35,9 +35,10 @@ class QuotesSpider(scrapy.Spider):
self.year = getattr(self, "year", None)
self.month = getattr(self, "month", None)
self.day = getattr(self, "day", None)
self.date_parser = {'january': 1, 'february': 2, 'march': 3, 'april': 4,
'may': 5, 'june': 6, 'july': 7, 'august': 8,
'september': 9, 'october': 10, 'november': 11, 'december': 12}
self.date_parser = {"january": 1, "february": 2, "march": 3, "april": 4,
"may": 5, "june": 6, "july": 7, "august": 8,
"september": 9, "october": 10, "november": 11, "december": 12}
self.baseURL = "http://www.edomexaldia.com.mx/" + self.year + "/" + self.month.zfill(2) + "/" + self.day.zfill(2)
......@@ -80,6 +81,7 @@ class QuotesSpider(scrapy.Spider):
item['date'] = dat
item['title'] = remove_tags(response.xpath('//div[@id="main"]/div/h1').extract_first()).strip()
item['topic'] = None
author = response.xpath('//span[@class="post_author_author"]').extract_first()
if author is not None and author != '':
......
File mode changed from 100644 to 100755
File mode changed from 100644 to 100755
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment