from __future__ import unicode_literals from django.shortcuts import render # Create your views here. from django.conf import settings from catalog.models import Publisher, News, Search, audioTime, Apikey from django.core.exceptions import ObjectDoesNotExist from django.shortcuts import redirect from django.http import HttpResponse from django.core.paginator import Paginator, EmptyPage, PageNotAnInteger from django.db.models import Count #from django.core.urlresolvers import reverse from django.urls import reverse from django.db.models.functions import TruncMonth, TruncYear from .forms import SearchForm, ProfileForm, SubscriptionsForm from django.db.models import Q import json import datetime import time import math from dateutil.relativedelta import relativedelta import dateutil.parser import urllib import json #import StringIO #python2 from io import StringIO #python3 from io import BytesIO #python3 import time import zipfile import csv from quantiphy import Quantity #from .audioList import getAudioList from numpy import array_equal import os import glob import shutil import wordcloud from stop_words import get_stop_words #from django.contrib.postgres.lookups import Unaccent AUDIOPATH = "/home/geoint/M3_NFS/recordings/" from django.contrib.postgres.search import SearchQuery, SearchRank, SearchVector, SearchHeadline from django.db.models import Value import html import string from django.contrib.auth import logout from django.http import JsonResponse from django.core.management import call_command from django.views.decorators.csrf import csrf_exempt import json #------------------------------------------------------------------------------- #------------------------------------------------------------------------------- def news2JSON(news): data = list() for n in news: item = dict() item['date'] = n.date.strftime('%Y-%m-%d') item['publisher'] = n.publisher.name item['title'] = n.title item['text'] = n.text item['url'] = n.url data.append(item) return data #------------------------------------------------------------------------------- #------------------------------------------------------------------------------- def logout_view(request): logout(request) # Redirect to a success page. #------------------------------------------------------------------------------- def settingsView(request): # form = ProfileForm( initial={'subscriptions':[ v for v in request.user.profile.subscriptions.all().values_list('id', flat=True)]}) print ("subS",[ v for v in request.user.profile.subscriptions.all().values_list('id', flat=True)]) print(request.POST) # print(form) #User.objects.get(username=) if request.method == "POST": form = ProfileForm(request.POST) if form.is_valid(): request.user.profile.subscriptions.set(form.cleaned_data['subscriptions']) else: form = ProfileForm( initial={'subscriptions':[ v for v in request.user.profile.subscriptions.all().values_list('id', flat=True)]}) news = News.objects.all() publishersList = Publisher.objects.filter( Q(id__in = news.values('publisher').distinct())) choice = [ (r.id,r.name) for r in publishersList ] form.fields['subscriptions'].choices=choice print( request.user.profile.subscriptions.all() ) return render(request,'new/userProfile.html',{"form":form}) #------------------------------------------------------------------------------- def settingsApiKey(request): return render(request,'new/searches.html',{}) #------------------------------------------------------------------------------- def settingsSearches(request): return render(request,'new/searches.html',{}) #------------------------------------------------------------------------------- def getNewsByRequest(request, values=None): print ("getNewsByRequest\n\n\n") print("GET REQUEST", request.GET) request.GET = request.GET.copy() form = SearchForm(request.GET) myQuery = ~Q() orderBy = "-date" textSearchFlag = False if 'publishers' in request.GET and request.GET['publishers']!="None" and request.GET['publishers']!="" and request.GET['publishers']!="[]": myQuery &= Q(publisher_id__in=form['publishers'].value()) else: myQuery &= Q(publisher_id__in=[ r.id for r in request.user.profile.subscriptions.all()]) if 'startDate' in request.GET and request.GET['startDate']!="None" and request.GET['startDate']!="": myQuery &= Q(date__gte=request.GET['startDate']) else: myQuery &= Q(date__gte=(datetime.date.today()- relativedelta(years=1)).strftime("%Y-%m-%d")) if 'endDate' in request.GET and request.GET['endDate']!="None" and request.GET['endDate']!="": edate = datetime.datetime.strptime(request.GET['endDate'], "%Y-%m-%d").date() + relativedelta(days=1) myQuery &= Q(date__lte=edate.strftime("%Y-%m-%d")) if "text" in request.GET and request.GET['text']!="None" and request.GET['text']!="": request.GET["text"]= html.unescape(request.GET["text"]) # myQuery &= Q(text__search=request.GET['text']) myQuery &= Q(search_vector=SearchQuery(request.GET['text'], search_type='websearch')) textSearchFlag = True print (myQuery) news_result = News.objects.filter(myQuery).order_by(orderBy) if textSearchFlag: news_result=news_result.annotate(rank=SearchRank(SearchVector("text", config="spanish"), SearchQuery(request.GET['text'], config="spanish", search_type='websearch'), normalization=Value(2).bitor(Value(4)),)).order_by("-rank") return news_result #------------------------------------------------------------------------------- def status(request): myQuery = ~Q() myQuery &= Q(type="texto") if "key" in request.GET: myQuery = ~Q() apiuser = Apikey.objects.get(key=request.GET["key"]).user myQuery&= Q(id__in=[ r.id for r in apiuser.profile.subscriptions.all()]) tPublishers = Publisher.objects.filter(myQuery & Q(active=True)) tdic = {} #= {p.name: {"docs": News.objects.filter(publisher=p).count(), "endDate":News.objects.filter(publisher=p).order_by('-date')[0].date} for p in tPublishers} for p in tPublishers: status = {} status["docs"] = News.objects.filter(publisher=p).count() status["origen"]=p.region+", "+p.country status["url"]=p.url status["id"]=p.id if status["docs"] > 0: status["startDate"]=News.objects.filter(publisher=p).order_by('date')[0].date status["endDate"]=News.objects.filter(publisher=p).order_by('-date')[0].date status["status"]=(datetime.datetime.now(datetime.timezone.utc)-status["endDate"]).days tdic[p.name]=status tPublishers2 = Publisher.objects.filter(myQuery &Q(active=False)) tdic2 = {} for p in tPublishers2: status = {} status["docs"] = News.objects.filter(publisher=p).count() status["startDate"]=News.objects.filter(publisher=p).order_by('date')[0].date status["endDate"]=News.objects.filter(publisher=p).order_by('-date')[0].date status["status"]=(datetime.datetime.now(datetime.timezone.utc)-status["endDate"]).days status["origen"]=p.region+", "+p.country status["url"]=p.url status["id"]=p.id tdic2[p.name]=status aPublishers = Publisher.objects.filter(type="audio") adic={} for p in aPublishers: status = {} if audioTime.objects.filter(publisher=p).count() >0: status["time"] = math.floor(audioTime.objects.filter(publisher=p)[0].minutes/60) status["startDate"] = audioTime.objects.filter(publisher=p)[0].startDate status["endDate"] = audioTime.objects.filter(publisher=p)[0].endDate status["status"]=(datetime.datetime.now(datetime.timezone.utc)-status["endDate"]).days status["origen"]=p.region+", "+p.country adic[p.name]=status return render(request,'new/status.html',{"textPublishers":tdic, "textPublishers2":tdic2, "audioPublishers":adic}) #------------------------------------------------------------------------------- def index(request): """ View function for home page of site. """ values = ["publisher", "date", "search_vector"]#, "text"] news = getNewsByRequest(request, values) data = request.GET.copy() form = SearchForm(data) if form.is_valid(): print("DATA",form.data) form.save(request.user) form.save_m2m() print ("form is valid") else: print ("form is NOT valid", form.errors) subscriptions = [ (r.id,r.name) for r in request.user.profile.subscriptions.all()] form.fields['publishers'].choices= subscriptions if news.count()>0: if 'startDate' not in form or ('startDate' in form and form['startDate'].value == ""): form.data.update({'startDate':news.values("date").earliest('date')["date"].strftime("%Y-%m-%d")}) if 'endDate' not in form or ('endDate' in form and form['endDate'].value == ""): form.data.update({'endDate':news.values("date").latest('date')["date"].strftime("%Y-%m-%d")}) info = dict() info['nPublishers'] = news.values('publisher').distinct().count() tp = 1 #len(subscriptions) if len(form['publishers'].value()) == 0 else len(form['publishers'].value()) info['publishers_percent'] = math.floor(100*info['nPublishers']/float(tp)) info['nNews'] = news.count() info['nNewsText'] = Quantity(news.count()).render(prec=3) info['nAudio'] = Publisher.objects.filter(type="audio").count() info['nTopics'] = 0 queryset = news.values("publisher").order_by("publisher").annotate(count = Count('publisher') ) urlDict = dict() for q in queryset: myRequest = request.GET.copy() myRequest.update({"publishers":q['publisher']}) urlDict[q['publisher']] = urllib.parse.urlencode(myRequest) donutChart = [{"label": Publisher.objects.get(id=q['publisher']).name, "value":q["count"], "url":urlDict[q['publisher']]} for q in queryset] dateCount = news.values("publisher").annotate(day=TruncMonth('date') ).values("day").order_by("day").annotate(count=Count("day")) histogramsData = [ { "date":d["day"].strftime("%Y-%m-%d"), "data":{ "total": d["count"]}} for d in dateCount ] histogramsLabels = ["total"] # return render(request,'index.html',{"form":form, "info":info, "donutChart":{}, "histogram": {}, "histogramsLabels":{}}) print(donutChart) return render(request,'new/index.html',{"form":form, "info":info, "donutChart":donutChart, "histogram": histogramsData, "histogramsLabels":histogramsLabels}) #------------------------------------------------------------------------------- def publisherList(request, type="all"): form = SearchForm(request.GET) subscriptions = [ (r.id,r.name) for r in request.user.profile.subscriptions.all()] form.fields['publishers'].choices= subscriptions typeQuery = ~Q() typeQuery = ~Q() if type!="all": typeQuery = Q(type=type) news = getNewsByRequest(request) publishers = Publisher.objects.filter( Q(id__in = news.values('publisher').distinct()) & typeQuery) for p in publishers: newsTemp = news.filter(publisher=p).order_by("date") p.news = newsTemp.count() p.date = 0 if p.news>0: p.date = newsTemp[0].date.date() else: p.date = "-" #subscriptions = [ (r.id,r.name) for r in request.user.profile.subscriptions.all()] #form.fields['publishers'].choices= subscriptions myRequest=request.GET.copy() if 'publishers' in myRequest: del myRequest['publishers'] return render(request,'new/publisherList.html',{"form":form, "publishers":publishers, "queryurl":urllib.parse.urlencode(myRequest)}) #------------------------------------------------------------------------------- def newsList(request, publisherShortName="all"): form = SearchForm(request.GET) subscriptions = [ (r.id,r.name) for r in request.user.profile.subscriptions.all()] form.fields['publishers'].choices= subscriptions return render(request,'new/newsList.html',{"form":form}) #------------------------------------------------------------------------------- def newsDetails(request, newsId): form = SearchForm(request.GET) news = News.objects.get(id=newsId) news.text = news.text print (request.GET) # if "text" in request.GET and request.GET["text"] != '' and request.GET["text"] != None: # translator = str.maketrans('', '', string.punctuation) # searchtext = request.GET['text'].translate(translator) # print("search text:", searchtext) # for i in searchtext.split(): # print("search for: ", i) # txtlen = len(i) # index = news.text.lower().find(i.lower()) # print("index:", i, index) # if index>0: # word = news.text[index:index+txtlen] # print(word) # news.text = news.text.replace(word, "<b>"+word+"</b>") paragraphs = news.text.split(". \n") paragraphs = ["<p>" + p + "</p>" for p in paragraphs] news.text = "".join(paragraphs) return render(request,'new/newsDetails.html',{"form":form,"news":news}) #------------------------------------------------------------------------------- def wsAudioList(request, publisher, start, end): filelist=[ f[:-5] for f in listAudioFiles(publisher, int(start), int(end))] return HttpResponse(json.dumps(filelist), content_type="application/json") #------------------------------------------------------------------------------- def wsSearchList(request): columns = ['text', 'startDate', 'endDate', "publishers"] order = dict() order["asc"]="" order["desc"]="-" orderBy = columns[ int(request.GET["order[0][column]"]) ] direction = order[request.GET["order[0][dir]"]] searches = Search.objects.filter(user=request.user).order_by(direction+orderBy) if 'search[value]' in request.GET and request.GET['search[value]'] != "": search = request.GET['search[value]'] searches = searches.filter(text__icontains=search) data = dict() data['data']=[[s.text, "*" if s is None else s.startDate.strftime('%Y-%m-%d'), "*" if s is None else s.endDate.strftime('%Y-%m-%d'), ','.join([ sub.shortName for sub in s.publishers.all()])] for s in searches] data['recordsTotal'] = searches.count() data['recordsFiltered'] = searches.count() paginator = Paginator(searches, request.GET['length']) page = (int(request.GET['start'])/int(request.GET['length']))+1 try: searches = paginator.page(page) except PageNotAnInteger: searches = paginator.page(1) except EmptyPage: searches = paginator.page(paginator.num_pages) data['data']=[[s.text, "*" if s is None else s.startDate.strftime('%Y-%m-%d'), "*" if s is None else s.endDate.strftime('%Y-%m-%d'), ','.join([ sub.shortName for sub in s.publishers.all()])] for s in searches] return HttpResponse(json.dumps(data), content_type="application/json") #------------------------------------------------------------------------------- def wsNewsList2(request): print("wsNewsList2") news = getNewsByRequest(request) data = [{"id":n.id, "publisher":n.publisher.name, "text":n.text, "date":n.date.strftime('%Y-%m-%d')} for n in news] return HttpResponse(json.dumps(data), content_type="application/json") #------------------------------------------------------------------------------- def wsNewsListTable(request):#, publisherShortName="all"): print("wsNewsList") print ("GET REQUEST", request.GET) columns = ['title', 'url', 'date'] order = dict() order["asc"]="" order["desc"]="-" orderBy = columns[ int(request.GET["order[0][column]"]) ] direction = order[request.GET["order[0][dir]"]] startDate = datetime.datetime(1950,1,1,0,0,0) endDate = datetime.datetime.today() request.GET = request.GET.copy() if "text" in request.GET: request.GET["text"]= html.unescape(request.GET["text"]) print ("GET REQUEST 2", request.GET) if 'ajaxPublishers[]' in request.GET: request.GET.setlist('publishers', request.GET.getlist('ajaxPublishers[]')) del request.GET['ajaxPublishers[]'] news = getNewsByRequest(request).order_by(direction+orderBy) if 'search[value]' in request.GET and request.GET['search[value]'] != "": search = request.GET['search[value]'] news = news.filter(text__icontains=search) data = dict() data['data']=list() data['recordsTotal'] = news.count() data['recordsFiltered'] = news.count() paginator = Paginator(news, request.GET['length']) page = (int(request.GET['start'])/int(request.GET['length']))+1 try: news = paginator.page(page) except PageNotAnInteger: news = paginator.page(1) except EmptyPage: news = paginator.page(paginator.num_pages) for n in news: row = list() txt = ("?text="+ request.GET['text'] ) if ("text" in request.GET) and (request.GET["text"] is not None ) else "" row.append('<a href="'+reverse('news-details', args =(n.id,) )+txt+'">'+ n.title+ '</a>') row.append('<a href="'+n.url+'" target="blank">'+n.publisher.shortName+'</a>') row.append(n.date.strftime("%Y-%m-%d")) data['data'].append(row) # print request.GET return HttpResponse(json.dumps(data), content_type="application/json") #------------------------------------------------------------------------------- def wsGraphs(request): data=dict() if request.GET['type']=="histogram": data = 0; return HttpResponse(json.dumps(data), content_type="application/json") #------------------------------------------------------------------------------- def wsDownloadNews(request): news = getNewsByRequest(request) s = BytesIO() #StringIO() # Open StringIO to grab in-memory ZIP contents zf = zipfile.ZipFile(s, "w") # The zip compressor zf.writestr("setting.txt", json.dumps(request.GET)) data = news2JSON(news); if request.GET['format']=="JSON": zf.writestr("data.json", json.dumps(data)) if request.GET['format']=="CSV": csvString = StringIO() writer = csv.writer(csvString,quoting=csv.QUOTE_ALL)#, fieldnames=[k for k in data[0]]) # writer.writeheader() for d in data: # row = [ d[k].encode('utf-8') for k in d ] row = [ d[k] for k in d ] writer.writerow( row ) zf.writestr("data.csv", csvString.getvalue()) zf.close() response = HttpResponse(s.getvalue(), content_type="application/x-zip-compressed") response['Content-Disposition'] = 'attachment; filename="news_'+ str(int(time.time()))+'.zip"' return response #------------------------------------------------------------------------------- def apiDownloadNews(request): apiuser = Apikey.objects.get(key=request.GET["key"]).user if apiuser is None: return HttpResponse(json.dumps("{}"), content_type="application/json") else: request.user=apiuser news = getNewsByRequest(request) s = BytesIO() zf = zipfile.ZipFile(s, "w") zf.writestr("setting.txt", json.dumps(request.GET)) data = news2JSON(news); if request.GET['format']=="JSON": zf.writestr("data.json", json.dumps(data)) if request.GET['format']=="CSV": csvString = StringIO() writer = csv.writer(csvString,quoting=csv.QUOTE_ALL) for d in data: row = [ d[k] for k in d ] writer.writerow( row ) zf.writestr("data.csv", csvString.getvalue()) zf.close() response = HttpResponse(s.getvalue(), content_type="application/x-zip-compressed") response['Content-Disposition'] = 'attachment; filename="news_'+ str(int(time.time()))+'.zip"' return response #------------------------------------------------------------------------------- def wsWordCloud(request, newsId): text = News.objects.filter(id=newsId)[0].text wc = wordcloud.WordCloud(stopwords={ w for w in get_stop_words('es')}, background_color="Gainsboro", width=512,height=512,mode="RGBA", colormap="tab20") wc.generate(text) image = wc.to_image() response = HttpResponse(content_type="image/jpeg") image.save(response, "PNG") return response #------------------------------------------------------------------------------- def wsSuggestions(request, newsId): data = {} document = News.objects.filter(id=newsId)[0] data =[n for n in News.objects.values().extra(where=['lev(text,%s)>=0.0'], params=[document.text])] return HttpResponse(json.dumps(data), content_type="application/json") #------------------------------------------------------------------------------- #------------------------------------------------------------------------------- def listAudioFiles(publisher, start, end): startDate = datetime.datetime.fromtimestamp(int(start)) endDate = datetime.datetime.fromtimestamp(int(end)) filepath = AUDIOPATH+publisher+"/"+str(startDate.year)+"/"+str(startDate.month).zfill(2)+"/"+str(startDate.day).zfill(2)+"/" os.chdir(filepath) return [ f for f in sorted(glob.glob("*.flac")) if int(f[:-5])>=start and int(f[:-5])<=end ] #------------------------------------------------------------------------------- def audioList(request): form = SearchForm(request.GET) publishers = audioTime.objects.filter(minutes__gt=0 ) #type="audio") return render(request,'new/audioList.html',{"form":form, "publishers":publishers}) #------------------------------------------------------------------------------- def audioPublisher(request, publisher): form = SearchForm(request.GET) info=dict() if Publisher.objects.filter(shortName=publisher).count() > 0: p = Publisher.objects.filter(shortName=publisher)[0] return render(request,'audioPlay.html',{"form":form, "publisher":p}) #------------------------------------------------------------------------------- def fileStreaming(request, publisher, time): response = HttpResponse(content_type="audio/flac") date = datetime.datetime.fromtimestamp(int(time)) filepath = AUDIOPATH+publisher+"/"+str(date.year)+"/"+str(date.month).zfill(2)+"/"+str(date.day).zfill(2)+"/"+time+".flac" with open(filepath, 'rb') as fp: data = fp.read() response.write(data) return response #------------------------------------------------------------------------------- def listStreaming(request, publisher, start, end): return HttpResponse(json.dumps(listAudioFiles(publisher, int(start), int(end))), content_type="application/json") #return HttpResponse(m3u8,content_type="audio/mpegurl") #------------------------------------------------------------------------------- def audioPlayerTest(request, publisher, start, end): filelist=[ f[:-5] for f in listAudioFiles(publisher, int(start), int(end))] return render(request,'new/audioPlayerTest.html',{"filelist":filelist, "publisher":publisher}) #----------------------------------|---------------------------------------------- def read_last_execution_date(log_file='log.txt'): try: with open(log_file, 'r') as file: return file.read().strip() except FileNotFoundError: return None def write_execution_date(log_file='log.txt'): today = datetime.date.today() with open(log_file, 'w') as file: file.write(today.strftime('%Y-%m-%d')) from django.http import JsonResponse, HttpResponseNotAllowed import json import datetime import requests from django.core.management import call_command from django.views.decorators.csrf import csrf_exempt @csrf_exempt def run_update_and_report(request): if request.method == 'POST': try: # Leer parámetros del request data = json.loads(request.body) db_path = data.get('db_path', '/data/m3/news/') json_output_path = data.get('output_path', 'catalog/static/js/data.js') # Ejecutar los comandos call_command('updateDB', db_path) call_command('report', json_output_path) # Obtener fechas last_execution_date = read_last_execution_date() if not last_execution_date: last_execution_date = datetime.date.today().strftime('%Y-%m-%d') fecha_inicio = last_execution_date fecha_final = (datetime.date.today() - datetime.timedelta(days=1)).strftime('%Y-%m-%d') # Enviar POST a la URL payload = { "fecha_inicio": fecha_inicio, "fecha_final": fecha_final } response = requests.post("https://em.geoint.mx/m3/processnews", json=payload) response.raise_for_status() # Guardar nueva fecha de ejecución write_execution_date() return JsonResponse({'status': 'success', 'message': f'Report generated at {json_output_path}', 'api_response': response.json()}) except Exception as e: return JsonResponse({'status': 'error', 'message': str(e)}, status=500) return HttpResponseNotAllowed(['POST'])