Commit f7f6bcc7 authored by Renán Sosa Guillen's avatar Renán Sosa Guillen

date parser

parent 155c0d82
......@@ -4,10 +4,10 @@ from collections import OrderedDict
"""
Uso:
python parse_date_files.py <nombre_del_crawler>
python parse_date_files.py <ruta_del_crawler> <nombre_archivo>
Ej.
python parse_date_files.py descarga_hacia_atras/laJornadaBC2
python parse_date_files.py descarga_hacia_atras/laJornadaBC2 noticias.json
"""
def dictRowGenerator(line):
......@@ -46,13 +46,14 @@ def dictRowGenerator(line):
info = sys.argv[1]
news_file = sys.argv[2]
media = info[info.rfind("/") + 1:]
download_type = info[:info.rfind("/")]
this_file_path = os.path.dirname(os.path.realpath(__file__))
json_file_path = this_file_path + "/" + download_type + "/" + media
destination_path = this_file_path + "/" + media
json_file = json.loads(open(json_file_path + "/noticias.json").read())
json_file = json.loads(open(json_file_path + "/" + news_file).read())
date_set = set()
for news in json_file:
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment