updateDB.py 4.64 KB
Newer Older
Mario Chirinos Colunga's avatar
Mario Chirinos Colunga committed
1
from django.core.management.base import BaseCommand, CommandError
Mario Chirinos Colunga's avatar
Mario Chirinos Colunga committed
2
from catalog.models import News, Publisher, Topic, audioTime
Mario Chirinos Colunga's avatar
Mario Chirinos Colunga committed
3 4
from django.db.models import Q
import os
Mario Chirinos's avatar
Mario Chirinos committed
5
import glob
Mario Chirinos Colunga's avatar
Mario Chirinos Colunga committed
6 7 8
import json
import datetime
from django.utils import timezone
Mario Chirinos Colunga's avatar
Mario Chirinos Colunga committed
9
import dateutil.parser
Mario Chirinos Colunga's avatar
Mario Chirinos Colunga committed
10 11 12
import itertools
from datetime import date

Mario Chirinos Colunga's avatar
Mario Chirinos Colunga committed
13
class Command(BaseCommand):
Mario Chirinos Colunga's avatar
Mario Chirinos Colunga committed
14 15 16 17 18 19 20 21
	help = 'Update database'


	def add_arguments(self, parser):
		parser.add_argument('basedir', nargs=1, type=str)

	def handle(self, *args, **options):

Mario Chirinos Colunga's avatar
Mario Chirinos Colunga committed
22
		if False:
Mario Chirinos Colunga's avatar
Mario Chirinos Colunga committed
23 24
			#update radio stations recotding time
			print("Recording Time:")
Mario Chirinos Colunga's avatar
Mario Chirinos Colunga committed
25
#			recordingsDir = "/home/mario/virtualHDD/m3/recordings/"
Mario Chirinos Colunga's avatar
Mario Chirinos Colunga committed
26
			recordingsDir = "/home/geoint/M3_NFS/recordings/"
Mario Chirinos Colunga's avatar
Mario Chirinos Colunga committed
27

Mario Chirinos Colunga's avatar
Mario Chirinos Colunga committed
28 29 30
			audioTime.objects.all().delete()
			publishers  = Publisher.objects.all().filter(type="audio")
			for p in publishers:
Mario Chirinos Colunga's avatar
Mario Chirinos Colunga committed
31 32 33 34 35 36 37 38
				files =  [files for r, d, files in os.walk(recordingsDir+p.shortName) if files]
				files =  sorted(list(itertools.chain.from_iterable(files)))
				sortedFiles = files
				#print(files)
#				f1 = files[0]
#				startDate = f1[:f1.find(".flac")]
#				f2 = files[len(files)-1]
#				endDate = f2[:f2.find(".flac")]
Mario Chirinos Colunga's avatar
Mario Chirinos Colunga committed
39 40 41 42 43 44 45 46
				minutes = len(files)
				sortedFiles = sorted([(f[:f.find(".flac")]) for f in files if f.find(".flac")>0 and f.count(".")==1])
				print(p.shortName+": "+ str(minutes))
				if len(sortedFiles)>2:
					print("timestamp: " + sortedFiles[0]) 
					print("len timestamp: " + str(len(sortedFiles[0]))) 

					since = datetime.datetime.utcfromtimestamp(int(sortedFiles[0]))
Mario Chirinos Colunga's avatar
Mario Chirinos Colunga committed
47 48 49 50 51 52
					print("startDate: ", since) 
					print(sortedFiles[len(sortedFiles)-1])
					endDate = datetime.datetime.utcfromtimestamp(int(sortedFiles[len(sortedFiles)-1]))
					print("endDate: ", endDate) 

					audioTime.objects.update_or_create(publisher=p, defaults={'minutes': minutes, "startDate": since, "endDate":endDate},)
Mario Chirinos Colunga's avatar
Mario Chirinos Colunga committed
53 54
		#load news
		os.chdir(options['basedir'][0])
Mario Chirinos's avatar
Mario Chirinos committed
55
		publisherList = [ i for i in os.listdir(options['basedir'][0]) if os.path.isdir(i) ]
Mario Chirinos Colunga's avatar
Mario Chirinos Colunga committed
56 57
		for p in publisherList:
			print (p)
Mario Chirinos's avatar
Mario Chirinos committed
58
			curentdir = options['basedir'][0]+p+"/"
Mario Chirinos's avatar
Mario Chirinos committed
59
			os.chdir(curentdir)
Mario Chirinos's avatar
Mario Chirinos committed
60
			publisher = Publisher.objects.all().filter(shortName=p)
Mario Chirinos's avatar
Mario Chirinos committed
61
			print(os.getcwd())
Mario Chirinos's avatar
Mario Chirinos committed
62
			if publisher.count()<=0 and os.path.isfile("settings.json") :
Mario Chirinos's avatar
Mario Chirinos committed
63

Mario Chirinos's avatar
Mario Chirinos committed
64
				print(p, "do not exsist, crating publisher")
Mario Chirinos's avatar
Mario Chirinos committed
65
				with open('settings.json', 'r') as f:
Mario Chirinos's avatar
Mario Chirinos committed
66 67 68 69 70 71 72 73
					cfgfile = json.load(f)
				newPublisher = Publisher()
				newPublisher.shortName=p
				newPublisher.name=cfgfile["name"]
				newPublisher.crawler=cfgfile["crawler"]
				newPublisher.url=cfgfile["url"]
				newPublisher.type="texto"
				newPublisher.save()
Mario Chirinos's avatar
Mario Chirinos committed
74 75
				publisher = Publisher.objects.all().filter(shortName=p)
			publisher=publisher[0]
Mario Chirinos's avatar
Mario Chirinos committed
76 77 78
			
			
			
Mario Chirinos Colunga's avatar
Mario Chirinos Colunga committed
79 80 81 82 83 84 85
			news = News.objects.all().filter(publisher=publisher.id).order_by("-date")
			minYear = 0
			lastDate = datetime.datetime(1950,1,1)

			if news.count()>0:
				minYear = news[0].date.year
				lastDate = news[0].date
Mario Chirinos's avatar
Mario Chirinos committed
86
			yearList = sorted([ int(y) for y in os.listdir('.') if os.path.isdir(y) ])
Mario Chirinos Colunga's avatar
Mario Chirinos Colunga committed
87 88

			print (yearList)
Mario Chirinos Colunga's avatar
Mario Chirinos Colunga committed
89
			for y in sorted(yearList):
Mario Chirinos Colunga's avatar
Mario Chirinos Colunga committed
90 91 92
				if y >=minYear:
					os.chdir(str(y))
					print (os.getcwd())
Mario Chirinos's avatar
Mario Chirinos committed
93 94
					#filesList = sorted(os.listdir("."))
					filesList = sorted(glob.glob("*.json"))
95
					print (filesList)
Mario Chirinos Colunga's avatar
Mario Chirinos Colunga committed
96 97 98 99 100 101
					for f in filesList:
						fileDate = datetime.datetime.strptime(f[:f.find(".")], "%Y-%m-%d").date()

						if fileDate >= lastDate.date():
							with open(f) as data_file:
								try:
Mario Chirinos Colunga's avatar
Mario Chirinos Colunga committed
102
									print(f)
Mario Chirinos Colunga's avatar
Mario Chirinos Colunga committed
103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120
									data = json.load(data_file)
									for d in data:
										newsDate = dateutil.parser.parse(d['date'])
										if News.objects.all().filter(Q(publisher=publisher.id)&Q(title=d['title'])&Q(date__gte=newsDate)).count() == 0:

											if d['title'] == None:
												d['title'] = "Sin Titulo"
											if len(d['title']) >= 512:
												d['title'] = d['title'][:500]
											print  (p + " "+  str(newsDate) +  ": " + d['title'])
											news = News()
											news.publisher = publisher
											news.title = d['title']
											news.text = d['text']
											news.url = d['url']
											news.date = newsDate
											news.save()

Mario Chirinos Colunga's avatar
Mario Chirinos Colunga committed
121 122 123
											if "topic" not in d:
												d['topic']=""

Mario Chirinos Colunga's avatar
Mario Chirinos Colunga committed
124 125 126 127 128 129 130 131 132
											if d['topic'] == "" or d['topic'] == None or d['topic'] ==[]:
												d['topic'] = "Sin Tema"

											if type(d['topic'])=="list":
												for t in d['topic']:
													topic, created = Topic.objects.all().get_or_create(name=t)
													news.topic.add(topic)
											else:
												topic, created = Topic.objects.all().get_or_create(name=d['topic'])
Mario Chirinos Colunga's avatar
Mario Chirinos Colunga committed
133
												news.topic.add(topic)
Mario Chirinos Colunga's avatar
Mario Chirinos Colunga committed
134 135 136 137 138 139 140 141 142 143 144 145

								except ValueError as e:
									print (e)
									print ("Error: " + os.getcwd()+"/"+f)
									print (type(d['topic']))
									
								data_file.close()						

					os.chdir("..")
			os.chdir("..")