from django.core.management.base import BaseCommand, CommandError from catalog.models import News, Publisher, Topic, audioTime from django.db.models import Q import os import glob import json import datetime from django.utils import timezone import dateutil.parser import itertools from datetime import date class Command(BaseCommand): help = 'Update database' def add_arguments(self, parser): parser.add_argument('basedir', nargs=1, type=str) def handle(self, *args, **options): if False: #update radio stations recotding time print("Recording Time:") # recordingsDir = "/home/mario/virtualHDD/m3/recordings/" recordingsDir = "/home/geoint/M3_NFS/recordings/" audioTime.objects.all().delete() publishers = Publisher.objects.all().filter(type="audio") for p in publishers: files = [files for r, d, files in os.walk(recordingsDir+p.shortName) if files] files = sorted(list(itertools.chain.from_iterable(files))) sortedFiles = files #print(files) # f1 = files[0] # startDate = f1[:f1.find(".flac")] # f2 = files[len(files)-1] # endDate = f2[:f2.find(".flac")] minutes = len(files) sortedFiles = sorted([(f[:f.find(".flac")]) for f in files if f.find(".flac")>0 and f.count(".")==1]) print(p.shortName+": "+ str(minutes)) if len(sortedFiles)>2: print("timestamp: " + sortedFiles[0]) print("len timestamp: " + str(len(sortedFiles[0]))) since = datetime.datetime.utcfromtimestamp(int(sortedFiles[0])) print("startDate: ", since) print(sortedFiles[len(sortedFiles)-1]) endDate = datetime.datetime.utcfromtimestamp(int(sortedFiles[len(sortedFiles)-1])) print("endDate: ", endDate) audioTime.objects.update_or_create(publisher=p, defaults={'minutes': minutes, "startDate": since, "endDate":endDate},) #load news os.chdir(options['basedir'][0]) publisherList = [ i for i in os.listdir(options['basedir'][0]) if os.path.isdir(i) ] for p in publisherList: print (p) curentdir = options['basedir'][0]+p+"/" os.chdir(curentdir) publisher = Publisher.objects.all().filter(shortName=p) print(os.getcwd()) if publisher.count()<=0 and os.path.isfile("settings.json") : print(p, "do not exsist, crating publisher") with open('settings.json', 'r') as f: cfgfile = json.load(f) newPublisher = Publisher() newPublisher.shortName=p newPublisher.name=cfgfile["name"] newPublisher.crawler=cfgfile["crawler"] newPublisher.url=cfgfile["url"] newPublisher.type="texto" newPublisher.save() publisher = Publisher.objects.all().filter(shortName=p) publisher=publisher[0] news = News.objects.all().filter(publisher=publisher.id).order_by("-date") minYear = 0 lastDate = datetime.datetime(1950,1,1) if news.count()>0: minYear = news[0].date.year lastDate = news[0].date yearList = sorted([ int(y) for y in os.listdir('.') if os.path.isdir(y) ]) print (yearList) for y in sorted(yearList): if y >=minYear: os.chdir(str(y)) print (os.getcwd()) #filesList = sorted(os.listdir(".")) filesList = sorted(glob.glob("*.json")) print (filesList) for f in filesList: fileDate = datetime.datetime.strptime(f[:f.find(".")], "%Y-%m-%d").date() if fileDate >= lastDate.date(): with open(f) as data_file: try: print(f) data = json.load(data_file) for d in data: newsDate = dateutil.parser.parse(d['date']) if News.objects.all().filter(Q(publisher=publisher.id)&Q(title=d['title'])&Q(date__gte=newsDate)).count() == 0: if d['title'] == None: d['title'] = "Sin Titulo" if len(d['title']) >= 512: d['title'] = d['title'][:500] print (p + " "+ str(newsDate) + ": " + d['title']) news = News() news.publisher = publisher news.title = d['title'] news.text = d['text'] news.url = d['url'] news.date = newsDate news.save() if "topic" not in d: d['topic']="" if d['topic'] == "" or d['topic'] == None or d['topic'] ==[]: d['topic'] = "Sin Tema" if type(d['topic'])=="list": for t in d['topic']: topic, created = Topic.objects.all().get_or_create(name=t) news.topic.add(topic) else: topic, created = Topic.objects.all().get_or_create(name=d['topic']) news.topic.add(topic) except ValueError as e: print (e) print ("Error: " + os.getcwd()+"/"+f) print (type(d['topic'])) data_file.close() os.chdir("..") os.chdir("..")