Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
crawlersNoticias
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
4
Issues
4
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
m3
crawlersNoticias
Commits
ad316343
Commit
ad316343
authored
Aug 04, 2017
by
Mario Chirinos Colunga
💬
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
script
parent
a79008f2
Changes
4
Expand all
Hide whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
1 addition
and
42 deletions
+1
-42
crawl_all.py
crawler_script/crawl_all.py
+1
-1
crawl_all.sh
crawler_script/crawl_all.sh
+0
-0
crawler_data.json
crawler_script/crawler_data.json
+0
-26
2017-01-06.json
sitios_yucatan/alChile/2017-01-06.json
+0
-15
No files found.
crawler_script/crawl_all.py
View file @
ad316343
...
...
@@ -53,7 +53,7 @@ with open(sys.argv[1]) as data_file:
currentDate
=
datetime
.
datetime
.
strptime
(
str
(
y
)
+
"-01-01"
,
'
%
Y-
%
m-
%
d'
)
day
=
1
for
d
in
range
(
day
,
365
+
1
):
for
d
in
range
(
day
,
(
365
if
today
.
year
!=
y
else
today
.
timetuple
()
.
tm_yday
)
+
1
):
filename
=
currentDate
.
strftime
(
'
%
Y-
%
m-
%
d'
)
+
".json"
scrapycommand
=
"scrapy crawl noticias -t json -o "
+
filename
+
" -a year="
+
str
(
currentDate
.
year
)
+
" -a month="
+
str
(
currentDate
.
month
)
+
" -a day="
+
str
(
currentDate
.
day
)
mydir
=
os
.
getcwd
()
...
...
crawler_script/crawl_all.sh
100644 → 100755
View file @
ad316343
File mode changed from 100644 to 100755
crawler_script/crawler_data.json
deleted
100644 → 0
View file @
a79008f2
[
{
"nombre"
:
"alChile"
,
"crawler"
:
"sitios_yucatan/alChile"
},
{
"nombre"
:
"desdeElBalcon"
,
"crawler"
:
"sitios_yucatan/desdeElBalcon"
},
{
"nombre"
:
"diarioYucatan"
,
"crawler"
:
"sitios_yucatan/diarioYucatan"
},
{
"nombre"
:
"grilloPorteno"
,
"crawler"
:
"sitios_yucatan/grilloPorteno"
},
{
"nombre"
:
"laJornadaMaya"
,
"crawler"
:
"sitios_yucatan/alChile"
},
{
"nombre"
:
"laVerdadYuc"
,
"crawler"
:
"sitios_yucatan/laVerdadYuc"
},
{
"nombre"
:
"lectorMX"
,
"crawler"
:
"sitios_yucatan/lectorMX"
},
{
"nombre"
:
"miPuntoDeVista"
,
"crawler"
:
"sitios_yucatan/miPuntoDeVista"
},
{
"nombre"
:
"notirivas"
,
"crawler"
:
"sitios_yucatan/notirivas"
},
{
"nombre"
:
"notisureste"
,
"crawler"
:
"sitios_yucatan/notisureste"
},
{
"nombre"
:
"puntoMedio"
,
"crawler"
:
"sitios_yucatan/puntoMedio"
},
{
"nombre"
:
"sona893"
,
"crawler"
:
"sitios_yucatan/sona893"
},
{
"nombre"
:
"yucatanALaMano"
,
"crawler"
:
"sitios_yucatan/yucatanALaMano"
},
{
"nombre"
:
"yucatanAlMinuto"
,
"crawler"
:
"sitios_yucatan/yucatanAlMinuto"
},
{
"nombre"
:
"yucatanEnCorto"
,
"crawler"
:
"sitios_yucatan/yucatanEnCorto"
},
{
"nombre"
:
"diarioYaqui"
,
"crawler"
:
"otros_sitios/diarioYaqui"
},
{
"nombre"
:
"laJornada"
,
"crawler"
:
"otros_sitios/laJornada"
},
{
"nombre"
:
"laJornadaAgs"
,
"crawler"
:
"otros_sitios/laJornadaAgs"
},
{
"nombre"
:
"laJornadaBC"
,
"crawler"
:
"otros_sitios/laJornadaBC"
},
{
"nombre"
:
"laJornadaGro"
,
"crawler"
:
"otros_sitios/laJornadaGro"
},
{
"nombre"
:
"laJornadaOte"
,
"crawler"
:
"otros_sitios/laJornadaOte"
},
{
"nombre"
:
"laJornadaSanLuis"
,
"crawler"
:
"otros_sitios/laJornadaSanLuis"
},
{
"nombre"
:
"laJornadaVer"
,
"crawler"
:
"otros_sitios/laJornadaVer"
},
{
"nombre"
:
"laJornadaZac"
,
"crawler"
:
"otros_sitios/laJornadaZac"
}
]
\ No newline at end of file
sitios_yucatan/alChile/2017-01-06.json
deleted
100644 → 0
View file @
a79008f2
This diff is collapsed.
Click to expand it.
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment