Commit 3e1b2011 authored by Renán Sosa Guillen's avatar Renán Sosa Guillen

crawl all

parent 4072187c
...@@ -24,8 +24,9 @@ with open(sys.argv[1]) as data_file: ...@@ -24,8 +24,9 @@ with open(sys.argv[1]) as data_file:
lstYears = os.listdir(".") lstYears = os.listdir(".")
lstYears.sort() lstYears.sort()
year = desde.year year = desde.year
if len(lstYears)>0: if len(lstYears) > 0:
year = int(lstYears[len(lstYears)-1]) year = int(lstYears[len(lstYears)-1])
for y in range(year, today.year+1): for y in range(year, today.year+1):
print y print y
try: try:
...@@ -44,16 +45,16 @@ with open(sys.argv[1]) as data_file: ...@@ -44,16 +45,16 @@ with open(sys.argv[1]) as data_file:
day = desde.timetuple().tm_yday day = desde.timetuple().tm_yday
print day print day
currentDate = desde currentDate = desde
if len(lstDays)>0: if len(lstDays) > 0:
strDate = lstDays[len(lstDays)-1] strDate = lstDays[len(lstDays)-1]
strDate = strDate[:strDate.find(".")] strDate = strDate[:strDate.find(".")]
currentDate = datetime.datetime.strptime(strDate, '%Y-%m-%d') currentDate = datetime.datetime.strptime(strDate, '%Y-%m-%d')
day = currentDate.timetuple().tm_yday day = currentDate.timetuple().tm_yday
elif y!=desde.year: elif y != desde.year:
currentDate = datetime.datetime.strptime(str(y)+"-01-01", '%Y-%m-%d') currentDate = datetime.datetime.strptime(str(y)+"-01-01", '%Y-%m-%d')
day = 1 day = 1
for d in range(day, (365 if today.year!=y else today.timetuple().tm_yday)+1): for d in range(day, ((datetime.date(y,12,31)-datetime.date(y,1,1)).days + 1 if today.year!=y else today.timetuple().tm_yday)+1):
filename = currentDate.strftime('%Y-%m-%d')+".json" filename = currentDate.strftime('%Y-%m-%d')+".json"
scrapycommand = "scrapy crawl noticias -t json -o " + filename + " -a year="+str(currentDate.year)+ " -a month="+str(currentDate.month)+" -a day="+str(currentDate.day) scrapycommand = "scrapy crawl noticias -t json -o " + filename + " -a year="+str(currentDate.year)+ " -a month="+str(currentDate.month)+" -a day="+str(currentDate.day)
mydir = os.getcwd() mydir = os.getcwd()
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment