Commit 67a2095b authored by Mario Chirinos's avatar Mario Chirinos

bulk upload

parent 789e2d00
......@@ -53,20 +53,20 @@ class Command(BaseCommand):
publisher=publisher[0]
#----------------------------------------------------------------
# news = News.objects.all().filter(publisher=publisher.id).order_by("-date")
# minYear = 0
# lastDate = datetime.datetime(1950,1,1)
news = News.objects.all().filter(publisher=publisher.id).order_by("-date")
minYear = 0
lastDate = datetime.datetime(1950,1,1)
# if news.count()>0:
# minYear = news[0].date.year
# lastDate = news[0].date
if news.count()>0:
minYear = news[0].date.year
lastDate = news[0].date
# Year list from directory structure
yearList = sorted([ int(y) for y in os.listdir('.') if os.path.isdir(y) ])
print (yearList)
for y in sorted(yearList):
# if y >=minYear:
if y >=minYear:
os.chdir(str(y))
print (os.getcwd())
......@@ -75,59 +75,61 @@ class Command(BaseCommand):
for f in filesList:
fileDate = datetime.datetime.strptime(f[:f.find(".")], "%Y-%m-%d").date()
print(fileDate)
if fileDate >= lastDate.date():
#delete news form the most recent day in the database so it can upload the day again
News.objects.filter(date__gte=fileDate).delete()
with open(f) as data_file:
try:
print(f)
data = json.load(data_file)
objectsList = []
for d in data:
newsDate = dateutil.parser.parse(d['date'])
#check for repeted news
# if News.objects.all().filter(Q(publisher=publisher.id)&Q(title=d['title'])&Q(date__gte=newsDate)).count() == 0:
if d['title'] == None:
d['title'] = "Sin Titulo"
if len(d['title']) >= 512:
d['title'] = d['title'][:500]
print (p + " "+ str(newsDate) + ": " + d['title'])
news = News()
news.publisher = publisher
news.title = d['title']
news.text = d['text']
news.url = d['url']
news.date = newsDate
# print ("topic type:", type(d['topic']))
# print (d['topic'])
if "topic" not in d or d['topic'] == "" or d['topic'] == None or d['topic'] ==[]:
topicstr = ["Sin Tema"]
if type(d['topic'])==list:
topicstr = d['topic']
# for t in d['topic']:
## topic, created = Topic.objects.all().get_or_create(name=t)
# topic, created = Topic.objects.get_or_create(name=t)
# news.topic.add(topic)
else:
# topic, created = Topic.objects.get_or_create(name=d['topic'])
# news.topic.add(topic)
topicstr = [d['topic']]
news.topic_raw = json.dumps(topicstr)
# news.save()
objectsList.append(news)
News.objects.bulk_create(objectsList)
# if fileDate >= lastDate.date():
with open(f) as data_file:
try:
print(f)
data = json.load(data_file)
objectsList = []
for d in data:
newsDate = dateutil.parser.parse(d['date'])
#check for repeted news
# if News.objects.all().filter(Q(publisher=publisher.id)&Q(title=d['title'])&Q(date__gte=newsDate)).count() == 0:
if d['title'] == None:
d['title'] = "Sin Titulo"
if len(d['title']) >= 512:
d['title'] = d['title'][:500]
print (p + " "+ str(newsDate) + ": " + d['title'])
news = News()
news.publisher = publisher
news.title = d['title']
news.text = d['text']
news.url = d['url']
news.date = newsDate
# print ("topic type:", type(d['topic']))
# print (d['topic'])
if "topic" not in d or d['topic'] == "" or d['topic'] == None or d['topic'] ==[]:
topicstr = ["Sin Tema"]
if type(d['topic'])==list:
topicstr = d['topic']
# for t in d['topic']:
## topic, created = Topic.objects.all().get_or_create(name=t)
# topic, created = Topic.objects.get_or_create(name=t)
# news.topic.add(topic)
else:
# topic, created = Topic.objects.get_or_create(name=d['topic'])
# news.topic.add(topic)
topicstr = [d['topic']]
except ValueError as e:
print (e)
print ("Error: " + os.getcwd()+"/"+f)
print (type(d['topic']))
news.topic_raw = json.dumps(topicstr)
# news.save()
objectsList.append(news)
News.objects.bulk_create(objectsList)
except ValueError as e:
print (e)
print ("Error: " + os.getcwd()+"/"+f)
print (type(d['topic']))
data_file.close()
data_file.close()
os.chdir("..")
os.chdir("..")
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment