Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
m3_webInterface
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
m3
m3_webInterface
Commits
bcdd38a5
Commit
bcdd38a5
authored
Dec 14, 2024
by
Mario Chirinos
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
lad news script
parent
b3463f5f
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
136 additions
and
2 deletions
+136
-2
loadNews.py
catalog/management/commands/loadNews.py
+134
-0
models.py
catalog/models.py
+2
-2
No files found.
catalog/management/commands/loadNews.py
0 → 100644
View file @
bcdd38a5
from
django.core.management.base
import
BaseCommand
,
CommandError
from
catalog.models
import
News
,
Publisher
,
Topic
,
audioTime
from
django.db.models
import
Q
import
os
import
glob
import
json
import
datetime
from
django.utils
import
timezone
import
dateutil.parser
import
itertools
from
datetime
import
date
updateRadioStations
=
False
recordingsDir
=
"/home/geoint/M3_NFS/recordings/"
class
Command
(
BaseCommand
):
help
=
'Update database'
def
add_arguments
(
self
,
parser
):
parser
.
add_argument
(
'basedir'
,
nargs
=
1
,
type
=
str
)
def
handle
(
self
,
*
args
,
**
options
):
#Load news
os
.
chdir
(
options
[
'basedir'
][
0
])
#Publisher list form directory structure
publisherList
=
[
i
for
i
in
os
.
listdir
(
options
[
'basedir'
][
0
])
if
os
.
path
.
isdir
(
i
)
]
for
p
in
publisherList
:
print
(
p
)
curentdir
=
options
[
'basedir'
][
0
]
+
p
+
"/"
os
.
chdir
(
curentdir
)
# publisher = Publisher.objects.all().filter(shortName=p)
publisher
=
Publisher
.
objects
.
get
(
shortName
=
p
)
print
(
os
.
getcwd
())
# if publisher.count()<=0:
if
publisher
.
count
()
is
None
:
if
os
.
path
.
isfile
(
"settings.json"
)
:
print
(
p
,
"do not exsist, crating publisher"
)
with
open
(
'settings.json'
,
'r'
)
as
f
:
cfgfile
=
json
.
load
(
f
)
newPublisher
=
Publisher
()
newPublisher
.
shortName
=
p
newPublisher
.
name
=
cfgfile
[
"name"
]
newPublisher
.
crawler
=
cfgfile
[
"crawler"
]
newPublisher
.
url
=
cfgfile
[
"url"
]
newPublisher
.
type
=
"texto"
newPublisher
.
save
()
# publisher = Publisher.objects.all().filter(shortName=p)
publisher
=
Publisher
.
objects
.
get
(
shortName
=
p
)
else
:
print
(
"settings.json not found"
)
# else:
# publisher=publisher[0]
#----------------------------------------------------------------
# news = News.objects.all().filter(publisher=publisher.id).order_by("-date")
# minYear = 0
# lastDate = datetime.datetime(1950,1,1)
# if news.count()>0:
# minYear = news[0].date.year
# lastDate = news[0].date
# Year list from directory structure
yearList
=
sorted
([
int
(
y
)
for
y
in
os
.
listdir
(
'.'
)
if
os
.
path
.
isdir
(
y
)
])
print
(
yearList
)
for
y
in
sorted
(
yearList
):
# if y >=minYear:
os
.
chdir
(
str
(
y
))
print
(
os
.
getcwd
())
filesList
=
sorted
(
glob
.
glob
(
"*.json"
))
print
(
filesList
)
for
f
in
filesList
:
fileDate
=
datetime
.
datetime
.
strptime
(
f
[:
f
.
find
(
"."
)],
"
%
Y-
%
m-
%
d"
)
.
date
()
# if fileDate >= lastDate.date():
with
open
(
f
)
as
data_file
:
try
:
print
(
f
)
data
=
json
.
load
(
data_file
)
for
d
in
data
:
newsDate
=
dateutil
.
parser
.
parse
(
d
[
'date'
])
#check for repeted news
# if News.objects.all().filter(Q(publisher=publisher.id)&Q(title=d['title'])&Q(date__gte=newsDate)).count() == 0:
if
d
[
'title'
]
==
None
:
d
[
'title'
]
=
"Sin Titulo"
if
len
(
d
[
'title'
])
>=
512
:
d
[
'title'
]
=
d
[
'title'
][:
500
]
print
(
p
+
" "
+
str
(
newsDate
)
+
": "
+
d
[
'title'
])
news
=
News
()
news
.
publisher
=
publisher
news
.
title
=
d
[
'title'
]
news
.
text
=
d
[
'text'
]
news
.
url
=
d
[
'url'
]
news
.
date
=
newsDate
# print ("topic type:", type(d['topic']))
# print (d['topic'])
topicstr
if
"topic"
not
in
d
or
d
[
'topic'
]
==
""
or
d
[
'topic'
]
==
None
or
d
[
'topic'
]
==
[]:
topicstr
=
[
"Sin Tema"
]
if
type
(
d
[
'topic'
])
==
list
:
topicstr
=
d
[
'topic'
]
# for t in d['topic']:
## topic, created = Topic.objects.all().get_or_create(name=t)
# topic, created = Topic.objects.get_or_create(name=t)
# news.topic.add(topic)
else
:
# topic, created = Topic.objects.get_or_create(name=d['topic'])
# news.topic.add(topic)
topicstr
=
[
d
[
'topic'
]]
news
.
topic_raw
=
json
.
domps
(
topicst
)
news
.
save
()
except
ValueError
as
e
:
print
(
e
)
print
(
"Error: "
+
os
.
getcwd
()
+
"/"
+
f
)
print
(
type
(
d
[
'topic'
]))
data_file
.
close
()
os
.
chdir
(
".."
)
os
.
chdir
(
".."
)
catalog/models.py
View file @
bcdd38a5
...
...
@@ -69,7 +69,7 @@ class News(models.Model):
verbose_name
=
'Noticia'
verbose_name_plural
=
'Noticias'
ordering
=
(
'date'
,)
indexes
=
[
GinIndex
(
fields
=
[
'search_vector'
])
]
#
indexes = [ GinIndex(fields=['search_vector']) ]
#-------------------------------------------------------------------------------
class
Search
(
models
.
Model
):
user
=
models
.
ForeignKey
(
User
,
on_delete
=
models
.
CASCADE
)
...
...
@@ -86,7 +86,7 @@ class Profile(models.Model):
@
receiver
(
post_save
,
sender
=
User
)
def
create_user_profile
(
sender
,
instance
,
created
,
**
kwargs
):
if
created
:
Profile
.
objects
.
create
(
user
=
instance
)
#, subscriptions=Publisher.objects.all())
Profile
.
objects
.
create
(
user
=
instance
)
#, subscriptions=Publisher.objects.all())
print
(
"profile created"
)
@
receiver
(
post_save
,
sender
=
User
)
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment