Commit dbdb00c3 authored by Renán Sosa Guillen's avatar Renán Sosa Guillen

crawlers

parent 7d116982
...@@ -43,9 +43,9 @@ class QuotesSpider(scrapy.Spider): ...@@ -43,9 +43,9 @@ class QuotesSpider(scrapy.Spider):
day = getattr(self, 'day', None) day = getattr(self, 'day', None)
self.date_parser = {'enero': 1, 'febrero': 2, 'marzo': 3, 'abril': 4, self.date_parser = {'enero': 1, 'febrero': 2, 'marzo': 3, 'abril': 4,
'mayo': 5, 'junio': 6, 'julio': 7, 'agosto': 8, 'mayo': 5, 'junio': 6, 'julio': 7, 'agosto': 8,
'septiembre': 9, 'octubre': 10, 'noviembre': 9, 'diciembre': 12} 'septiembre': 9, 'octubre': 10, 'noviembre': 11, 'diciembre': 12}
self.baseURL='http://www.unomasuno.com.mx/index.php/'+year+'/'+month+'/'+day self.baseURL='http://www.unomasuno.com.mx/'+year+'/'+month+'/'+day
yield scrapy.Request(url=self.baseURL, callback=self.parse) yield scrapy.Request(url=self.baseURL, callback=self.parse)
...@@ -91,7 +91,7 @@ class QuotesSpider(scrapy.Spider): ...@@ -91,7 +91,7 @@ class QuotesSpider(scrapy.Spider):
d = response.xpath('//p[@class="post-meta"]/span/text()').extract_first() d = response.xpath('//p[@class="post-meta"]/span/text()').extract_first()
d = d.replace(',','').split(' ') d = d.replace(',','').split(' ')
item['date'] = datetime(int(d[2]), self.date_parser[d[0].lower()], int(d[1]), tzinfo=self.tz).isoformat('T') item['date'] = datetime(int(d[2]), self.date_parser[d[1].lower()], int(d[0]), tzinfo=self.tz).isoformat('T')
item['topic'] = response.xpath('//span[@typeof="v:Breadcrumb"]/a/text()').extract()[1] item['topic'] = response.xpath('//span[@typeof="v:Breadcrumb"]/a/text()').extract()[1]
item['title'] = response.xpath('//*[@class="post-inner"]/h1/span/text()').extract_first() item['title'] = response.xpath('//*[@class="post-inner"]/h1/span/text()').extract_first()
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment