Commit f1dfa7e9 authored by Renán Sosa Guillen's avatar Renán Sosa Guillen

crawlers

parent 720f6059
...@@ -211,6 +211,16 @@ Se incluyen los siguientes medios: ...@@ -211,6 +211,16 @@ Se incluyen los siguientes medios:
scrapy crawl noticias -t json --nolog -o noticias.json -a year=2017 -a month=3 -a day=22 scrapy crawl noticias -t json --nolog -o noticias.json -a year=2017 -a month=3 -a day=22
``` ```
No se encontró notas antes del 2011.11.28. No se encontró notas antes del 2011.11.28.
* [Proceso](http://www.proceso.com.mx/)
Uso:
```bash
cd proceso
scrapy crawl noticias -t json --nolog -o noticias.json -a year=2017 -a month=3
```
No se encontró notas antes de Nov. 1976.
* [Punto Medio](https://www.puntomedio.mx/) * [Punto Medio](https://www.puntomedio.mx/)
......
[ [
{"nombre": "Al Chile", "crawler": "sitios_yucatan/alChile", "desde": "01-06-2014", "url": "http://alchile.com.mx/"}, {"nombre": "Al Chile", "crawler": "descarga_por_fecha/alChile", "desde": "01-06-2014", "url": "http://alchile.com.mx/"},
{"nombre": "Desde el Balcón", "crawler": "sitios_yucatan/desdeElBalcon", "desde": "11-01-2014", "url": "http://www.desdeelbalcon.com/"}, {"nombre": "Desde el Balcón", "crawler": "descarga_por_fecha/desdeElBalcon", "desde": "11-01-2014", "url": "http://www.desdeelbalcon.com/"},
{"nombre": "Diario de Yucatán", "crawler": "sitios_yucatan/diarioYucatan", "desde": "02-04-2012", "url": "http://yucatan.com.mx/"}, {"nombre": "Diario del Yaqui", "crawler": "descarga_por_fecha/diarioYaqui", "desde": "30-12-2016", "url": "http://diariodelyaqui.mx"},
{"nombre": "El Grillo", "crawler": "sitios_yucatan/grilloPorteno", "desde": "04-11-2013", "url": "http://grilloporteno.com/"}, {"nombre": "Diario de Yucatán", "crawler": "descarga_por_fecha/diarioYucatan", "desde": "02-04-2012", "url": "http://yucatan.com.mx/"},
{"nombre": "La Jornada Maya", "crawler": "sitios_yucatan/laJornadaMaya", "desde": "12-03-2015", "url": "https://www.lajornadamaya.mx/"}, {"nombre": "El Grillo", "crawler": "descarga_por_fecha/grilloPorteno", "desde": "04-11-2013", "url": "http://grilloporteno.com/"},
{"nombre": "La Verdad Yucatán", "crawler": "sitios_yucatan/laVerdadYuc", "desde": "01-12-2015", "url": "http://laverdadnoticias.com/"}, {"nombre": "La Jornada", "crawler": "descarga_por_fecha/laJornada", "desde": "01-02-2005", "url": "http://www.jornada.unam.mx"},
{"nombre": "Lector MX", "crawler": "sitios_yucatan/lectorMX", "desde": "23-10-2015", "url": "http://lectormx.com/"}, {"nombre": "La Jornada Aguascalientes", "crawler": "descarga_por_fecha/laJornadaAgs", "desde": "01-12-2008", "url": "http://www.lja.mx/"},
{"nombre": "Mi Punto de Vista", "crawler": "sitios_yucatan/miPuntoDeVista", "desde": "04-10-2008", "url": "http://www.mipuntodevista.com.mx/"}, {"nombre": "La Jornada Baja California", "crawler": "descarga_por_fecha/laJornadaBC", "desde": "09-02-2015", "url": "http://jornadabc.mx"},
{"nombre": "Notirivas", "crawler": "sitios_yucatan/notirivas", "desde": "22-11-2016", "url": "http://gruporivas.com.mx/notirivas/"}, {"nombre": "La Jornada Guerrero", "crawler": "descarga_por_fecha/laJornadaGro", "desde": "22-01-2007", "url": "http://www.lajornadaguerrero.com.mx"},
{"nombre": "Notisureste", "crawler": "sitios_yucatan/notisureste", "desde": "28-11-2011", "url": "http://www.notisureste.com/"}, {"nombre": "La Jornada Maya", "crawler": "descarga_por_fecha/laJornadaMaya", "desde": "12-03-2015", "url": "https://www.lajornadamaya.mx/"},
{"nombre": "Punto Medio", "crawler": "sitios_yucatan/puntoMedio", "desde": "02-08-2015", "url": "https://www.puntomedio.mx/"}, {"nombre": "La Jornada de Oriente", "crawler": "descarga_por_fecha/laJornadaOte", "desde": "01-06-2013", "url": "http://www.lajornadadeoriente.com.mx/"},
{"nombre": "Sona 89.3", "crawler": "sitios_yucatan/sona893", "desde": "09-04-2012", "url": "http://sona893.fm/"}, {"nombre": "La Jornada San Luis", "crawler": "descarga_por_fecha/laJornadaSanLuis", "desde": "08-10-2014", "url": "http://lajornadasanluis.com.mx"},
{"nombre": "Yucatán a la Mano", "crawler": "sitios_yucatan/yucatanALaMano", "desde": "11-05-2015", "url": "http://www.yucatanalamano.com/"}, {"nombre": "La Jornada Veracruz", "crawler": "descarga_por_fecha/laJornadaVer", "desde": "11-05-2009", "url": "http://www.jornadaveracruz.com.mx"},
{"nombre": "Yucatán al Minuto", "crawler": "sitios_yucatan/yucatanAlMinuto", "desde": "17-01-2017", "url": "http://www.yucatanalminuto.com/"}, {"nombre": "La Jornada Zacatecas", "crawler": "descarga_por_fecha/laJornadaZac", "desde": "10-06-2013", "url": "http://ljz.mx"},
{"nombre": "Yucatán en Corto", "crawler": "sitios_yucatan/yucatanEnCorto", "desde": "02-04-2011", "url": "http://florcastillo.mx/noticias/"}, {"nombre": "La Verdad Yucatán", "crawler": "descarga_por_fecha/laVerdadYuc", "desde": "01-12-2015", "url": "http://laverdadnoticias.com/"},
{"nombre": "Diario del Yaqui", "crawler": "otros_sitios/diarioYaqui", "desde": "30-12-2016", "url": "http://diariodelyaqui.mx"}, {"nombre": "Lector MX", "crawler": "descarga_por_fecha/lectorMX", "desde": "23-10-2015", "url": "http://lectormx.com/"},
{"nombre": "La Jornada", "crawler": "otros_sitios/laJornada", "desde": "01-02-2005", "url": "http://www.jornada.unam.mx"}, {"nombre": "Mi Punto de Vista", "crawler": "descarga_por_fecha/miPuntoDeVista", "desde": "04-10-2008", "url": "http://www.mipuntodevista.com.mx/"},
{"nombre": "La Jornada Aguascalientes", "crawler": "otros_sitios/laJornadaAgs", "desde": "01-12-2008", "url": "http://www.lja.mx/"}, {"nombre": "Notirivas", "crawler": "descarga_por_fecha/notirivas", "desde": "22-11-2016", "url": "http://gruporivas.com.mx/notirivas/"},
{"nombre": "La Jornada Baja California", "crawler": "otros_sitios/laJornadaBC", "desde": "09-02-2015", "url": "http://jornadabc.mx"}, {"nombre": "Notisureste", "crawler": "descarga_por_fecha/notisureste", "desde": "28-11-2011", "url": "http://www.notisureste.com/"},
{"nombre": "La Jornada Guerrero", "crawler": "otros_sitios/laJornadaGro", "desde": "22-01-2007", "url": "http://www.lajornadaguerrero.com.mx"}, {"nombre": "Proceso", "crawler": "descarga_por_mes/proceso", "desde": "11-1976", "url": "http://www.proceso.com.mx/"},
{"nombre": "La Jornada de Oriente", "crawler": "otros_sitios/laJornadaOte", "desde": "01-06-2013", "url": "http://www.lajornadadeoriente.com.mx/"}, {"nombre": "Punto Medio", "crawler": "descarga_por_fecha/puntoMedio", "desde": "02-08-2015", "url": "https://www.puntomedio.mx/"},
{"nombre": "La Jornada San Luis", "crawler": "otros_sitios/laJornadaSanLuis", "desde": "08-10-2014", "url": "http://lajornadasanluis.com.mx"}, {"nombre": "Sona 89.3", "crawler": "descarga_por_fecha/sona893", "desde": "09-04-2012", "url": "http://sona893.fm/"},
{"nombre": "La Jornada Veracruz", "crawler": "otros_sitios/laJornadaVer", "desde": "11-05-2009", "url": "http://www.jornadaveracruz.com.mx"}, {"nombre": "Yucatán a la Mano", "crawler": "descarga_por_fecha/yucatanALaMano", "desde": "11-05-2015", "url": "http://www.yucatanalamano.com/"},
{"nombre": "La Jornada Zacatecas", "crawler": "otros_sitios/laJornadaZac", "desde": "10-06-2013", "url": "http://ljz.mx"} {"nombre": "Yucatán al Minuto", "crawler": "descarga_por_fecha/yucatanAlMinuto", "desde": "17-01-2017", "url": "http://www.yucatanalminuto.com/"},
{"nombre": "Yucatán en Corto", "crawler": "descarga_por_fecha/yucatanEnCorto", "desde": "02-04-2011", "url": "http://florcastillo.mx/noticias/"}
] ]
\ No newline at end of file
...@@ -70,7 +70,3 @@ with open(sys.argv[1]) as data_file: ...@@ -70,7 +70,3 @@ with open(sys.argv[1]) as data_file:
os.chdir("..") os.chdir("..")
print today.year print today.year
# scrapy crawl noticias -t json -o $y-$m-$d.json -a year=$y -a month=$m -a day=$d # ejecucion del crawler correspondiente segun el sitio # scrapy crawl noticias -t json -o $y-$m-$d.json -a year=$y -a month=$m -a day=$d # ejecucion del crawler correspondiente segun el sitio
This source diff could not be displayed because it is too large. You can view the blob instead.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment