new db

fed7af4d · Mario Chirinos Colunga · 13ce817c · fed7af4d · fed7af4d · fed7af4d
Commit fed7af4d authored Nov 20, 2018 by Mario Chirinos Colunga 💬
18 changed files
--- a/catalog/__pycache__/views.cpython-36.pyc
+++ b/catalog/__pycache__/views.cpython-36.pyc
--- a/catalog/management/commands/__pycache__/gunsNroses.cpython-36.pyc
+++ b/catalog/management/commands/__pycache__/gunsNroses.cpython-36.pyc
--- a/catalog/management/commands/__pycache__/gunsNroses2.cpython-36.pyc
+++ b/catalog/management/commands/__pycache__/gunsNroses2.cpython-36.pyc
--- a/catalog/management/commands/__pycache__/gunsNrosesGetNews.cpython-36.pyc
+++ b/catalog/management/commands/__pycache__/gunsNrosesGetNews.cpython-36.pyc
--- a/catalog/management/commands/__pycache__/updateDB.cpython-36.pyc
+++ b/catalog/management/commands/__pycache__/updateDB.cpython-36.pyc
--- a/catalog/management/commands/cfg.json
+++ b/catalog/management/commands/cfg.json
+{
+	"model":"/home/mario/git/Guns_N_Roses/utils/BayesClassifier_pos_1000_neg_1250_acc_0.92.pkl",
+	"documents":"/home/mario/git/Guns_N_Roses/utils/ids_vistos_primera_anot",
+	"stopwords":"/home/mario/git/Guns_N_Roses/utils/stopwords",
+	"stop": 500
+}
--- a/catalog/management/commands/gunsNroses.py
+++ b/catalog/management/commands/gunsNroses.py
@@ -28,6 +28,9 @@ class Command(BaseCommand):
 		categories = dict()
 		doclist = list()
+		print(newsList.count())
+		print(p.num_pages)
+		categories ={"roses":0, "guns":0}
 		for i in range (1,p.num_pages):
 #			print(i)
 			docs_new = [d for d in p.page(i) if d.text!=""]
@@ -41,13 +44,13 @@ class Command(BaseCommand):
 				item['text'] = document
 				item['id'] = docs_id[ii]
 				doclist.append(item)
-				if category not in categories:
+#				if category not in categories:
-					categories[category] = 0;
+#					categories[category] = 0;
 				categories[category] +=1
 				ii+=1
 			finish = True
 			for c in categories:
-				finish = finish and categories[c] >=1000
+				finish = finish and categories[c] >= 5000
 			if finish:
 				break
@@ -59,7 +62,7 @@ class Command(BaseCommand):
 		print(categories)
 		with open('data.json', 'w', encoding='utf8') as outfile:
 			s = json.dumps(doclist,  ensure_ascii=False, indent=2)
-			print (s)
+#			print (s)
 			outfile.write(s)

--- a/catalog/management/commands/gunsNroses2.py
+++ b/catalog/management/commands/gunsNroses2.py
+from django.core.management.base import BaseCommand, CommandError
+from catalog.models import User, News, Publisher, Topic, audioTime, Search
+from django.db.models import Q
+import operator
+from django.core.paginator import Paginator
+import os
+#import simplejson as json
+import json
+from django.db.models import Q
+import sys
+sys.path.append("/home/mario/git/")
+from Guns_N_Roses import M3GunsNRoses
+import datetime
+#from os import path
+import codecs
+import nltk
+from nltk.stem import SnowballStemmer
+from textblob.classifiers import (NaiveBayesClassifier)
+import dill as pickle
+class Command(BaseCommand):
+	help = 'Report database'
+	def add_arguments(self, parser):
+#		parser.add_argument('model', nargs=1, type=str) #(options['basedir'][0]
+		parser.add_argument('cfg', nargs=1, type=str) 
+	def handle(self, *args, **options):
+		cfg = None
+		with open(options['cfg'][0]) as f:
+    			cfg = json.load(f)
+		print(cfg['model'])
+		clf = M3GunsNRoses.load_model(cfg["model"])
+		stopwords = M3GunsNRoses.file2list(cfg["stopwords"])
+		documentsOut = M3GunsNRoses.file2list(cfg["documents"])
+		stemmer = SnowballStemmer('spanish')
+		pub=[3, 46, 39, 43, 45, 41, 40, 6, 7, 8, 27, 9, 10, 12, 13, 14, 15, 17, 31, 21, 24]
+		publishers = Publisher.objects.all().filter(id__in=pub)
+		myQuery = Q(publisher__in=publishers) &  Q(date__gte=datetime.date(2014, 1, 1)) & ~Q(id__in=documentsOut)
+		newsList = News.objects.all().filter(myQuery).order_by('id')
+		p = Paginator(newsList, 50)
+		print("Classifing...")
+		categories = dict()
+		doclist = list()
+		categories = {"guns":0, "roses":0}
+		for i in range (1,p.num_pages):
+			docs_new = [d for d in p.page(i) if d.text!=""]
+			for doc in docs_new:
+				text = doc.text.replace("\xa0", " ").replace("\x93", "").replace("\x94", "").replace('"', '').replace("'","")
+				dist_classes = M3GunsNRoses.NBanalyse(clf, text, stopwords)
+				item = dict()
+				item['category'] = max(dist_classes.items(), key=operator.itemgetter(1))[0]
+				item['probabilities'] = dist_classes
+				item['text'] = text
+				item['id'] = doc.id
+				print(dist_classes)
+				doclist.append(item)
+				category=item['category']
+				categories[category] +=1
+				print(dist_classes)
+			if categories["guns"] >= cfg["stop"]:
+				break
+#		for l in doclist:
+#			print(l)
+#		print 
+#		print(categories)
+		with open('data.json', 'w', encoding='utf8') as outfile:
+			s = json.dumps(doclist,  ensure_ascii=False, indent=2)
+			print (s)
+			outfile.write(s)
--- a/catalog/management/commands/gunsNroses2.pyc
+++ b/catalog/management/commands/gunsNroses2.pyc
--- a/catalog/management/commands/gunsNrosesGetNews.py
+++ b/catalog/management/commands/gunsNrosesGetNews.py
+from django.core.management.base import BaseCommand, CommandError
+from catalog.models import User, News, Publisher, Topic, audioTime, Search
+from django.db.models import Q
+import operator
+from django.core.paginator import Paginator
+import os
+#import simplejson as json
+import json
+from django.db.models import Q
+import sys
+sys.path.append("/home/mario/git/")
+from Guns_N_Roses import M3GunsNRoses
+import datetime
+#from os import path
+import codecs
+import nltk
+from nltk.stem import SnowballStemmer
+from textblob.classifiers import (NaiveBayesClassifier)
+import dill as pickle
+#-------------------------------------------------------------------------------------------------
+def file2list(fname):
+	with open(fname) as f:
+		lines = f.readlines()
+	lines = [l.strip('\n') for l in lines if not l is '']
+	return lines
+#-------------------------------------------------------------------------------------------------
+class Command(BaseCommand):
+	help = 'Report database'
+	def add_arguments(self, parser):
+#		parser.add_argument('model', nargs=1, type=str) #(options['basedir'][0]
+		parser.add_argument('words', nargs=1, type=str) 
+	def handle(self, *args, **options):
+		wordList = file2list(options['words'][0])
+		print(list)
+		#with open(options['word'][0]) as f:
+    		#	cfg = json.load(f)
+		pub=[3, 46, 39, 43, 45, 41, 40, 6, 7, 8, 27, 9, 10, 12, 13, 14, 15, 17, 31, 21, 24]
+		publishers = Publisher.objects.all().filter(id__in=pub)
+#		myQuery = Q(publisher__in=publishers) &  Q(date__gte=datetime.date(2014, 1, 1))  & Q(search_vector__in=wordList)
+#		newsList = News.objects.all().filter(myQuery).order_by('id')
+#		print(newsList.count())
+		ids = set()
+		for w in wordList:
+			myQuery = Q(publisher__in=publishers) &  Q(date__gte=datetime.date(2014, 1, 1)) & Q(search_vector=w)
+			newsList = News.objects.all().filter(myQuery)
+			for i in newsList:
+				ids.add(i.id)
+		myQuery = Q(id__in=ids)
+		newsList = News.objects.all().filter(myQuery)
+		print("end")
+		print(newsList.count())
+		doclist = [{"text":n.text, "id":n.id, "date":n.date.strftime("%Y-%m-%d"), "url":n.url} for n in newsList]
+		with open('dataNews.json', 'w', encoding='utf8') as outfile:
+			s = json.dumps(doclist,  ensure_ascii=False, indent=2)
+#			print (s)
+			outfile.write(s)
--- a/catalog/management/commands/gunsNrosesGetNews.pyc
+++ b/catalog/management/commands/gunsNrosesGetNews.pyc
--- a/catalog/management/commands/report.py
+++ b/catalog/management/commands/report.py
@@ -8,10 +8,18 @@ class Command(BaseCommand):
 	help = 'Report database'
-#	def add_arguments(self, parser):
+	def add_arguments(self, parser):
-#		parser.add_argument('basedir', nargs=1, type=str) #(options['basedir'][0]
+		parser.add_argument('json', nargs=1, type=str) #(options['basedir'][0]
 	def handle(self, *args, **options):
+		data = dict()
+		data['users'] = User.objects.all().count()
+		data['publishers'] = Publisher.objects.all().count()
+		data['text'] = Publisher.objects.all().filter(type="texto").count()
+		data['audio'] = Publisher.objects.all().filter(type="audio").count()
+		data['documents'] = News.objects.all().count()
+		data['searches'] = Search.objects.all().count()
 		print ( "Users: " + str(User.objects.all().count()) )
 		print ( "Publishers: " + str(Publisher.objects.all().count()) )
 		print ( "\tText: " + str(Publisher.objects.all().filter(type="texto").count()) )
@@ -36,3 +44,6 @@ class Command(BaseCommand):
 		audioSources = Publisher.objects.all().filter(type="audio")
 		for a in audioSources:
 			print(a.name + ", " + a.url + ", " + str(audioTime.objects.all().filter(publisher=a)[0].minutes ))  
+		print(data)
+		with open(options['json'][0], 'w') as outfile:
+			json.dump(data, outfile)
--- a/catalog/management/commands/report.pyc
+++ b/catalog/management/commands/report.pyc
--- a/catalog/management/commands/updateDB.py
+++ b/catalog/management/commands/updateDB.py
@@ -18,7 +18,7 @@ class Command(BaseCommand):
 	def handle(self, *args, **options):
-		if True:
+		if False:
 			#update radio stations recotding time
 			print("Recording Time:")
 			recordingsDir = "/home/mario/virtualHDD/m3/recordings/"
@@ -50,14 +50,14 @@ class Command(BaseCommand):
 			if news.count()>0:
 				minYear = news[0].date.year
 				lastDate = news[0].date
-			yearList = [ int(y) for y in os.listdir('.')]
+			yearList = sorted([ int(y) for y in os.listdir('.')])
 			print (yearList)
-			for y in yearList:
+			for y in sorted(yearList):
 				if y >=minYear:
 					os.chdir(str(y))
 					print (os.getcwd())
-					filesList = os.listdir(".")
+					filesList = sorted(os.listdir("."))
 					for f in filesList:
 						fileDate = datetime.datetime.strptime(f[:f.find(".")], "%Y-%m-%d").date()

--- a/catalog/views.py
+++ b/catalog/views.py
@@ -145,7 +145,7 @@ def settingsView(request):
 	else:
 		form = ProfileForm( initial={'subscriptions':[ v for v in request.user.profile.subscriptions.all().values_list('id', flat=True)]})
 		news = News.objects.all()
-		publishersList = Publisher.objects.all().filter( Q(id__in = news.values('publisher').distinct()))
+		publishersList = Publisher.objects.filter( Q(id__in = news.values('publisher').distinct()))
 		choice =  [ (r.id,r.name) for r in publishersList ] 
 		form.fields['subscriptions'].choices=choice
@@ -184,7 +184,7 @@ def getNewsByRequest(request):
 	print (myQuery)
-	return News.objects.all().filter(myQuery)
+	return News.objects.filter(myQuery)
 #-------------------------------------------------------------------------------
 def index(request):
@@ -217,7 +217,7 @@ def index(request):
 	info['nPublishers'] = news.values('publisher').distinct().count()
 	info['nNews'] = news.count()
 	info['nNewsText'] = Quantity(news.count()).render(prec=3)  
-	info['nAudio'] = Publisher.objects.all().filter(type="audio").count()
+	info['nAudio'] = Publisher.objects.filter(type="audio").count()
 	queryset = news.values("publisher").order_by("publisher").annotate(count = Count('publisher') )
@@ -230,7 +230,7 @@ def index(request):
 	donutChart = [{"label": Publisher.objects.get(id=q['publisher']).name, "value":q["count"], "url":urlDict[q['publisher']]} for q in queryset]
-	publishers = Publisher.objects.all().filter( Q(id__in = news.values('publisher').distinct()))
+	publishers = Publisher.objects.filter( Q(id__in = news.values('publisher').distinct()))
 	queryset = news.values("publisher").annotate(day=TruncMonth('date') )
@@ -254,7 +254,7 @@ def publisherList(request, type="all"):
 	news = getNewsByRequest(request)
-	publishers = Publisher.objects.all().filter( Q(id__in = news.values('publisher').distinct()) & typeQuery)
+	publishers = Publisher.objects.filter( Q(id__in = news.values('publisher').distinct()) & typeQuery)
 	for p in publishers:
@@ -295,7 +295,7 @@ def wsAudioList(request):
 #-------------------------------------------------------------------------------
 def wsSearchList(request):
-	searches = Search.objects.all().filter(user=request.user)
+	searches = Search.objects.filter(user=request.user)
 	data = dict()
 	data['data']=[[s.text, "*" if s is None else s.startDate.strftime('%Y-%m-%d'), "*" if s is None else s.endDate.strftime('%Y-%m-%d'), ','.join([ sub.shortName for sub in s.publishers.all()])] for s in searches]
@@ -405,7 +405,7 @@ def wsDownloadNews(request):
 def audioList(request):
 	form = SearchForm(request.GET)
-	publishers = audioTime.objects.all().filter(minutes__gt=0 ) #type="audio")
+	publishers = audioTime.objects.filter(minutes__gt=0 ) #type="audio")
 	return render(request,'audioList.html',{"form":form, "publishers":publishers})
 #-------------------------------------------------------------------------------
@@ -413,7 +413,7 @@ def audioPublisher(request, publisher):
 	form = SearchForm(request.GET)
 	info=dict()
-	if Publisher.objects.all().filter(shortName=publisher).count() > 0:
+	if Publisher.objects.filter(shortName=publisher).count() > 0:
-		p =  Publisher.objects.all().filter(shortName=publisher)[0]
+		p =  Publisher.objects.filter(shortName=publisher)[0]
 	return render(request,'audioPlay.html',{"form":form, "publisher":p})
--- a/m3_webInterface/__pycache__/urls.cpython-36.pyc
+++ b/m3_webInterface/__pycache__/urls.cpython-36.pyc
--- a/m3_webInterface/urls.py
+++ b/m3_webInterface/urls.py
@@ -26,4 +26,5 @@ urlpatterns = [
 	url(r'^catalog/', include('catalog.urls')),
 	url(r'^$', RedirectView.as_view(url='/catalog/', permanent=True)),
 	url(r'^accounts/', include('django.contrib.auth.urls')),
+#    	url(r'^ws/reports/images/(?P<type>\w+)$', views.wsReportImages, name='wsReportImages')
 ] + static(settings.STATIC_URL, document_root=settings.STATIC_ROOT)
--- a/m3_webInterface/urls.pyc
+++ b/m3_webInterface/urls.pyc