bulk upload

67a2095b · Mario Chirinos · 789e2d00 · 67a2095b
Commit 67a2095b authored Dec 14, 2024 by Mario Chirinos
Hide whitespace changes
Inline Side-by-side

Showing with 60 additions and 58 deletions

loadNews.py catalog/management/commands/loadNews.py +60 -58

No files found.
--- a/catalog/management/commands/loadNews.py
+++ b/catalog/management/commands/loadNews.py
@@ -53,20 +53,20 @@ class Command(BaseCommand):
 				publisher=publisher[0]
 				
 			#----------------------------------------------------------------
-#			news = News.objects.all().filter(publisher=publisher.id).order_by("-date")
-#			minYear = 0
-#			lastDate = datetime.datetime(1950,1,1)
+			news = News.objects.all().filter(publisher=publisher.id).order_by("-date")
+			minYear = 0
+			lastDate = datetime.datetime(1950,1,1)

-#			if news.count()>0:
-#				minYear = news[0].date.year
-#				lastDate = news[0].date
+			if news.count()>0:
+				minYear = news[0].date.year
+				lastDate = news[0].date
 			
 			# Year list from directory structure
 			yearList = sorted([ int(y) for y in os.listdir('.') if os.path.isdir(y) ])

 			print (yearList)
 			for y in sorted(yearList):
-#				if y >=minYear:
+				if y >=minYear:
 					os.chdir(str(y))
 					print (os.getcwd())

@@ -75,59 +75,61 @@ class Command(BaseCommand):
 					
 					for f in filesList:
 						fileDate = datetime.datetime.strptime(f[:f.find(".")], "%Y-%m-%d").date()
+						print(fileDate)
+						if fileDate >= lastDate.date():
+							#delete news form the most recent day in the database so it can upload the day again
+							News.objects.filter(date__gte=fileDate).delete()
+							with open(f) as data_file:
+								try:
+									print(f)
+									data = json.load(data_file)
+									objectsList = []
+									for d in data:
+										newsDate = dateutil.parser.parse(d['date'])
+										#check for repeted news
+	#										if News.objects.all().filter(Q(publisher=publisher.id)&Q(title=d['title'])&Q(date__gte=newsDate)).count() == 0:
+
+										if d['title'] == None:
+											d['title'] = "Sin Titulo"
+										if len(d['title']) >= 512:
+											d['title'] = d['title'][:500]
+										print (p + " "+  str(newsDate) +  ": " + d['title'])
+										news = News()
+										news.publisher = publisher
+										news.title = d['title']
+										news.text = d['text']
+										news.url = d['url']
+										news.date = newsDate
+
+
+	#											print ("topic type:",  type(d['topic']))
+	#											print (d['topic'])
+										if "topic" not in d or d['topic'] == "" or d['topic'] == None or d['topic'] ==[]:
+											topicstr = ["Sin Tema"]
+
+										if type(d['topic'])==list:
+											topicstr = d['topic']
+	#												for t in d['topic']:
+	##													topic, created = Topic.objects.all().get_or_create(name=t)
+	#													topic, created = Topic.objects.get_or_create(name=t)
+	#													news.topic.add(topic)
+										else:
+	#												topic, created = Topic.objects.get_or_create(name=d['topic'])
+	#												news.topic.add(topic)
+											topicstr = [d['topic']]
+										
+										news.topic_raw = json.dumps(topicstr)
+	#									news.save()
+										objectsList.append(news)
+									
+									News.objects.bulk_create(objectsList)

-#						if fileDate >= lastDate.date():
-						with open(f) as data_file:
-							try:
-								print(f)
-								data = json.load(data_file)
-								objectsList = []
-								for d in data:
-									newsDate = dateutil.parser.parse(d['date'])
-									#check for repeted news
-#										if News.objects.all().filter(Q(publisher=publisher.id)&Q(title=d['title'])&Q(date__gte=newsDate)).count() == 0:
-
-									if d['title'] == None:
-										d['title'] = "Sin Titulo"
-									if len(d['title']) >= 512:
-										d['title'] = d['title'][:500]
-									print (p + " "+  str(newsDate) +  ": " + d['title'])
-									news = News()
-									news.publisher = publisher
-									news.title = d['title']
-									news.text = d['text']
-									news.url = d['url']
-									news.date = newsDate
-
-
-#											print ("topic type:",  type(d['topic']))
-#											print (d['topic'])
-									if "topic" not in d or d['topic'] == "" or d['topic'] == None or d['topic'] ==[]:
-										topicstr = ["Sin Tema"]
-
-									if type(d['topic'])==list:
-										topicstr = d['topic']
-#												for t in d['topic']:
-##													topic, created = Topic.objects.all().get_or_create(name=t)
-#													topic, created = Topic.objects.get_or_create(name=t)
-#													news.topic.add(topic)
-									else:
-#												topic, created = Topic.objects.get_or_create(name=d['topic'])
-#												news.topic.add(topic)
-										topicstr = [d['topic']]
+								except ValueError as e:
+									print (e)
+									print ("Error: " + os.getcwd()+"/"+f)
+									print (type(d['topic']))
 									
-									news.topic_raw = json.dumps(topicstr)
-#									news.save()
-									objectsList.append(news)
-								
-								News.objects.bulk_create(objectsList)
-
-							except ValueError as e:
-								print (e)
-								print ("Error: " + os.getcwd()+"/"+f)
-								print (type(d['topic']))
-								
-							data_file.close()
+								data_file.close()

 					os.chdir("..")
 			os.chdir("..")