from django.core.management.base import BaseCommand, CommandError from catalog.models import User, News, Publisher, Topic, audioTime, Search from django.db.models import Q from django.core.paginator import Paginator import os #import simplejson as json import json import sys sys.path.append("/home/mario/git/") from Guns_N_Roses import GunsAndRoses import datetime class Command(BaseCommand): help = 'Report database' def add_arguments(self, parser): parser.add_argument('model', nargs=1, type=str) #(options['basedir'][0] def handle(self, *args, **options): clf = GunsAndRoses.load_model(options['model'][0]) pub=[3, 46, 39, 43, 45, 41, 40, 6, 7, 8, 27, 9, 10, 12, 13, 14, 15, 17, 31, 21, 24] publishers = Publisher.objects.all().filter(id__in=pub) newsList = News.objects.all().filter(publisher__in=publishers).filter(date__gte=datetime.date(2014, 1, 1)).order_by('id') p = Paginator(newsList, 50) categories = dict() doclist = list() print(newsList.count()) print(p.num_pages) categories ={"roses":0, "guns":0} for i in range (1,p.num_pages): # print(i) docs_new = [d for d in p.page(i) if d.text!=""] docs_text = [d.text.replace("\xa0", " ").replace("\x93", "").replace("\x94", "").replace('"', '').replace("'","") for d in docs_new ] docs_id = [d.id for d in docs_new ] predicted_classes = GunsAndRoses.predict_classes(docs_text, clf) ii = 0; for (category, document) in predicted_classes: item = dict() item['category'] = category item['text'] = document item['id'] = docs_id[ii] doclist.append(item) # if category not in categories: # categories[category] = 0; categories[category] +=1 ii+=1 finish = True for c in categories: finish = finish and categories[c] >= 5000 if finish: break # for l in doclist: # print(l) # print print(categories) with open('data.json', 'w', encoding='utf8') as outfile: s = json.dumps(doclist, ensure_ascii=False, indent=2) # print (s) outfile.write(s)