Commit 077afd63 authored by Renán Sosa Guillen's avatar Renán Sosa Guillen

date classifier

parent 2175e603
...@@ -10,7 +10,7 @@ python parse_date_files.py laJornadaBC2 ...@@ -10,7 +10,7 @@ python parse_date_files.py laJornadaBC2
""" """
this_file_path = os.path.dirname(os.path.realpath(__file__)) this_file_path = os.path.dirname(os.path.realpath(__file__))
json_file_path = this_file_path+'/crawlersNoticias/descarga_hacia_atras/'+sys.argv[1] json_file_path = this_file_path+'/descarga_hacia_atras/'+sys.argv[1]
destination_path = this_file_path+'/'+sys.argv[1] destination_path = this_file_path+'/'+sys.argv[1]
json_file = json.loads(open(json_file_path+'/noticias.json').read()) json_file = json.loads(open(json_file_path+'/noticias.json').read())
...@@ -19,6 +19,8 @@ date_set = set() ...@@ -19,6 +19,8 @@ date_set = set()
for news in json_file: for news in json_file:
if news['date'] is not None: if news['date'] is not None:
news_date = news['date'][:news['date'].rfind('T')] news_date = news['date'][:news['date'].rfind('T')]
if len(news_date) > 10:
news_date = news['date'][:news['date'].rfind(' ')]
if not news_date in date_set: if not news_date in date_set:
date_set.add(news_date) date_set.add(news_date)
...@@ -36,6 +38,8 @@ for news in json_file: ...@@ -36,6 +38,8 @@ for news in json_file:
for line in json_file: for line in json_file:
if line['date'] is not None: if line['date'] is not None:
line_date = line['date'][:line['date'].rfind('T')] line_date = line['date'][:line['date'].rfind('T')]
if len(line_date) > 10:
line_date = line['date'][:line['date'].rfind(' ')]
if line_date == news_date: if line_date == news_date:
counter += 1 counter += 1
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment