Commit 6e0a0add authored by Renán Sosa Guillen's avatar Renán Sosa Guillen

crawl all

parent 38c3e463
...@@ -7,10 +7,8 @@ import datetime ...@@ -7,10 +7,8 @@ import datetime
today = datetime.datetime.now() today = datetime.datetime.now()
# baseDir = "/home/geoint/virtualHDD/m3/noticias/" baseDir = "/home/geoint/virtualHDD/m3/noticias/"
# scrapyDir = "/home/geoint/crawlersNoticias/" scrapyDir = "/home/geoint/crawlersNoticias/"
baseDir = "/home/cna_service/prueba/"
scrapyDir = "/home/cna_service/crawler/crawlersNoticias/"
with open(sys.argv[1]) as data_file: with open(sys.argv[1]) as data_file:
siteList = json.load(data_file) siteList = json.load(data_file)
os.chdir(baseDir) os.chdir(baseDir)
...@@ -60,7 +58,6 @@ with open(sys.argv[1]) as data_file: ...@@ -60,7 +58,6 @@ with open(sys.argv[1]) as data_file:
for d in range(day, ((datetime.date(y,12,31)-datetime.date(y,1,1)).days + 1 if today.year!=y else today.timetuple().tm_yday)+1): for d in range(day, ((datetime.date(y,12,31)-datetime.date(y,1,1)).days + 1 if today.year!=y else today.timetuple().tm_yday)+1):
filename = currentDate.strftime('%Y-%m-%d')+".json" filename = currentDate.strftime('%Y-%m-%d')+".json"
# scrapycommand = "scrapy crawl noticias -t json --nolog -o " + filename + " -a year="+str(currentDate.year)+ " -a month="+str(currentDate.month)+" -a day="+str(currentDate.day)
scrapycommand = "scrapy crawl noticias --nolog -s filename=" + filename + " -a year="+str(currentDate.year)+ " -a month="+str(currentDate.month)+" -a day="+str(currentDate.day) scrapycommand = "scrapy crawl noticias --nolog -s filename=" + filename + " -a year="+str(currentDate.year)+ " -a month="+str(currentDate.month)+" -a day="+str(currentDate.day)
mydir = os.getcwd() mydir = os.getcwd()
print mydir print mydir
...@@ -77,5 +74,5 @@ with open(sys.argv[1]) as data_file: ...@@ -77,5 +74,5 @@ with open(sys.argv[1]) as data_file:
os.chdir("..") os.chdir("..")
os.chdir("..") os.chdir("..")
print today.year # print today.year
# scrapy crawl noticias -t json -o $y-$m-$d.json -a year=$y -a month=$m -a day=$d # ejecucion del crawler correspondiente segun el sitio # scrapy crawl noticias -t json -o $y-$m-$d.json -a year=$y -a month=$m -a day=$d # ejecucion del crawler correspondiente segun el sitio
\ No newline at end of file
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment