from django.core.management.base import BaseCommand, CommandError from catalog.models import User, News, Publisher, Topic, audioTime, Search from django.db.models import Q import operator from django.core.paginator import Paginator import os #import simplejson as json import json from django.db.models import Q import sys sys.path.append("/home/mario/git/") from Guns_N_Roses import M3GunsNRoses import datetime #from os import path import codecs import nltk from nltk.stem import SnowballStemmer from textblob.classifiers import (NaiveBayesClassifier) import dill as pickle #------------------------------------------------------------------------------------------------- def file2list(fname): with open(fname) as f: lines = f.readlines() lines = [l.strip('\n') for l in lines if not l is ''] return lines #------------------------------------------------------------------------------------------------- class Command(BaseCommand): help = 'Report database' def add_arguments(self, parser): # parser.add_argument('model', nargs=1, type=str) #(options['basedir'][0] parser.add_argument('words', nargs=1, type=str) def handle(self, *args, **options): wordList = file2list(options['words'][0]) print(list) #with open(options['word'][0]) as f: # cfg = json.load(f) pub=[3, 46, 39, 43, 45, 41, 40, 6, 7, 8, 27, 9, 10, 12, 13, 14, 15, 17, 31, 21, 24] publishers = Publisher.objects.all().filter(id__in=pub) # myQuery = Q(publisher__in=publishers) & Q(date__gte=datetime.date(2014, 1, 1)) & Q(search_vector__in=wordList) # newsList = News.objects.all().filter(myQuery).order_by('id') # print(newsList.count()) ids = set() for w in wordList: myQuery = Q(publisher__in=publishers) & Q(date__gte=datetime.date(2014, 1, 1)) & Q(search_vector=w) newsList = News.objects.all().filter(myQuery) for i in newsList: ids.add(i.id) myQuery = Q(id__in=ids) newsList = News.objects.all().filter(myQuery) print("end") print(newsList.count()) doclist = [{"text":n.text, "id":n.id, "date":n.date.strftime("%Y-%m-%d"), "url":n.url} for n in newsList] with open('dataNews.json', 'w', encoding='utf8') as outfile: s = json.dumps(doclist, ensure_ascii=False, indent=2) # print (s) outfile.write(s)