from django.core.management.base import BaseCommand, CommandError
from catalog.models import User, News, Publisher, Topic, audioTime, Search
from django.db.models import Q
from django.core.paginator import Paginator

import os
#import simplejson as json
import json

import sys
sys.path.append("/home/mario/git/")
from Guns_N_Roses import GunsAndRoses
import datetime
class Command(BaseCommand):
	help = 'Report database'


	def add_arguments(self, parser):
		parser.add_argument('model', nargs=1, type=str) #(options['basedir'][0]

	def handle(self, *args, **options):
		clf = GunsAndRoses.load_model(options['model'][0])
		pub=[3, 46, 39, 43, 45, 41, 40, 6, 7, 8, 27, 9, 10, 12, 13, 14, 15, 17, 31, 21, 24]
		publishers = Publisher.objects.all().filter(id__in=pub)
		newsList = News.objects.all().filter(publisher__in=publishers).filter(date__gte=datetime.date(2014, 1, 1)).order_by('id')
		p = Paginator(newsList, 50)

		categories = dict()

		doclist = list()
		print(newsList.count())
		print(p.num_pages)
		categories ={"roses":0, "guns":0}
		for i in range (1,p.num_pages):
#			print(i)
			docs_new = [d for d in p.page(i) if d.text!=""]
			docs_text = [d.text.replace("\xa0", " ").replace("\x93", "").replace("\x94", "").replace('"', '').replace("'","") for d in docs_new ]
			docs_id = [d.id for d in docs_new ]
			predicted_classes = GunsAndRoses.predict_classes(docs_text, clf)
			ii = 0;
			for (category, document) in predicted_classes:
				item = dict()
				item['category'] = category
				item['text'] = document
				item['id'] = docs_id[ii]
				doclist.append(item)
#				if category not in categories:
#					categories[category] = 0;
				categories[category] +=1
				ii+=1
			finish = True
			for c in categories:
				finish = finish and categories[c] >= 5000
			if finish:
				break


		
#		for l in doclist:
#			print(l)
#		print 
		print(categories)
		with open('data.json', 'w', encoding='utf8') as outfile:
			s = json.dumps(doclist,  ensure_ascii=False, indent=2)
#			print (s)
			outfile.write(s)