Commit f1dfa7e9 authored by Renán Sosa Guillen's avatar Renán Sosa Guillen

crawlers

parent 720f6059
......@@ -211,6 +211,16 @@ Se incluyen los siguientes medios:
scrapy crawl noticias -t json --nolog -o noticias.json -a year=2017 -a month=3 -a day=22
```
No se encontró notas antes del 2011.11.28.
* [Proceso](http://www.proceso.com.mx/)
Uso:
```bash
cd proceso
scrapy crawl noticias -t json --nolog -o noticias.json -a year=2017 -a month=3
```
No se encontró notas antes de Nov. 1976.
* [Punto Medio](https://www.puntomedio.mx/)
......
[
{"nombre": "Al Chile", "crawler": "sitios_yucatan/alChile", "desde": "01-06-2014", "url": "http://alchile.com.mx/"},
{"nombre": "Desde el Balcón", "crawler": "sitios_yucatan/desdeElBalcon", "desde": "11-01-2014", "url": "http://www.desdeelbalcon.com/"},
{"nombre": "Diario de Yucatán", "crawler": "sitios_yucatan/diarioYucatan", "desde": "02-04-2012", "url": "http://yucatan.com.mx/"},
{"nombre": "El Grillo", "crawler": "sitios_yucatan/grilloPorteno", "desde": "04-11-2013", "url": "http://grilloporteno.com/"},
{"nombre": "La Jornada Maya", "crawler": "sitios_yucatan/laJornadaMaya", "desde": "12-03-2015", "url": "https://www.lajornadamaya.mx/"},
{"nombre": "La Verdad Yucatán", "crawler": "sitios_yucatan/laVerdadYuc", "desde": "01-12-2015", "url": "http://laverdadnoticias.com/"},
{"nombre": "Lector MX", "crawler": "sitios_yucatan/lectorMX", "desde": "23-10-2015", "url": "http://lectormx.com/"},
{"nombre": "Mi Punto de Vista", "crawler": "sitios_yucatan/miPuntoDeVista", "desde": "04-10-2008", "url": "http://www.mipuntodevista.com.mx/"},
{"nombre": "Notirivas", "crawler": "sitios_yucatan/notirivas", "desde": "22-11-2016", "url": "http://gruporivas.com.mx/notirivas/"},
{"nombre": "Notisureste", "crawler": "sitios_yucatan/notisureste", "desde": "28-11-2011", "url": "http://www.notisureste.com/"},
{"nombre": "Punto Medio", "crawler": "sitios_yucatan/puntoMedio", "desde": "02-08-2015", "url": "https://www.puntomedio.mx/"},
{"nombre": "Sona 89.3", "crawler": "sitios_yucatan/sona893", "desde": "09-04-2012", "url": "http://sona893.fm/"},
{"nombre": "Yucatán a la Mano", "crawler": "sitios_yucatan/yucatanALaMano", "desde": "11-05-2015", "url": "http://www.yucatanalamano.com/"},
{"nombre": "Yucatán al Minuto", "crawler": "sitios_yucatan/yucatanAlMinuto", "desde": "17-01-2017", "url": "http://www.yucatanalminuto.com/"},
{"nombre": "Yucatán en Corto", "crawler": "sitios_yucatan/yucatanEnCorto", "desde": "02-04-2011", "url": "http://florcastillo.mx/noticias/"},
{"nombre": "Diario del Yaqui", "crawler": "otros_sitios/diarioYaqui", "desde": "30-12-2016", "url": "http://diariodelyaqui.mx"},
{"nombre": "La Jornada", "crawler": "otros_sitios/laJornada", "desde": "01-02-2005", "url": "http://www.jornada.unam.mx"},
{"nombre": "La Jornada Aguascalientes", "crawler": "otros_sitios/laJornadaAgs", "desde": "01-12-2008", "url": "http://www.lja.mx/"},
{"nombre": "La Jornada Baja California", "crawler": "otros_sitios/laJornadaBC", "desde": "09-02-2015", "url": "http://jornadabc.mx"},
{"nombre": "La Jornada Guerrero", "crawler": "otros_sitios/laJornadaGro", "desde": "22-01-2007", "url": "http://www.lajornadaguerrero.com.mx"},
{"nombre": "La Jornada de Oriente", "crawler": "otros_sitios/laJornadaOte", "desde": "01-06-2013", "url": "http://www.lajornadadeoriente.com.mx/"},
{"nombre": "La Jornada San Luis", "crawler": "otros_sitios/laJornadaSanLuis", "desde": "08-10-2014", "url": "http://lajornadasanluis.com.mx"},
{"nombre": "La Jornada Veracruz", "crawler": "otros_sitios/laJornadaVer", "desde": "11-05-2009", "url": "http://www.jornadaveracruz.com.mx"},
{"nombre": "La Jornada Zacatecas", "crawler": "otros_sitios/laJornadaZac", "desde": "10-06-2013", "url": "http://ljz.mx"}
{"nombre": "Al Chile", "crawler": "descarga_por_fecha/alChile", "desde": "01-06-2014", "url": "http://alchile.com.mx/"},
{"nombre": "Desde el Balcón", "crawler": "descarga_por_fecha/desdeElBalcon", "desde": "11-01-2014", "url": "http://www.desdeelbalcon.com/"},
{"nombre": "Diario del Yaqui", "crawler": "descarga_por_fecha/diarioYaqui", "desde": "30-12-2016", "url": "http://diariodelyaqui.mx"},
{"nombre": "Diario de Yucatán", "crawler": "descarga_por_fecha/diarioYucatan", "desde": "02-04-2012", "url": "http://yucatan.com.mx/"},
{"nombre": "El Grillo", "crawler": "descarga_por_fecha/grilloPorteno", "desde": "04-11-2013", "url": "http://grilloporteno.com/"},
{"nombre": "La Jornada", "crawler": "descarga_por_fecha/laJornada", "desde": "01-02-2005", "url": "http://www.jornada.unam.mx"},
{"nombre": "La Jornada Aguascalientes", "crawler": "descarga_por_fecha/laJornadaAgs", "desde": "01-12-2008", "url": "http://www.lja.mx/"},
{"nombre": "La Jornada Baja California", "crawler": "descarga_por_fecha/laJornadaBC", "desde": "09-02-2015", "url": "http://jornadabc.mx"},
{"nombre": "La Jornada Guerrero", "crawler": "descarga_por_fecha/laJornadaGro", "desde": "22-01-2007", "url": "http://www.lajornadaguerrero.com.mx"},
{"nombre": "La Jornada Maya", "crawler": "descarga_por_fecha/laJornadaMaya", "desde": "12-03-2015", "url": "https://www.lajornadamaya.mx/"},
{"nombre": "La Jornada de Oriente", "crawler": "descarga_por_fecha/laJornadaOte", "desde": "01-06-2013", "url": "http://www.lajornadadeoriente.com.mx/"},
{"nombre": "La Jornada San Luis", "crawler": "descarga_por_fecha/laJornadaSanLuis", "desde": "08-10-2014", "url": "http://lajornadasanluis.com.mx"},
{"nombre": "La Jornada Veracruz", "crawler": "descarga_por_fecha/laJornadaVer", "desde": "11-05-2009", "url": "http://www.jornadaveracruz.com.mx"},
{"nombre": "La Jornada Zacatecas", "crawler": "descarga_por_fecha/laJornadaZac", "desde": "10-06-2013", "url": "http://ljz.mx"},
{"nombre": "La Verdad Yucatán", "crawler": "descarga_por_fecha/laVerdadYuc", "desde": "01-12-2015", "url": "http://laverdadnoticias.com/"},
{"nombre": "Lector MX", "crawler": "descarga_por_fecha/lectorMX", "desde": "23-10-2015", "url": "http://lectormx.com/"},
{"nombre": "Mi Punto de Vista", "crawler": "descarga_por_fecha/miPuntoDeVista", "desde": "04-10-2008", "url": "http://www.mipuntodevista.com.mx/"},
{"nombre": "Notirivas", "crawler": "descarga_por_fecha/notirivas", "desde": "22-11-2016", "url": "http://gruporivas.com.mx/notirivas/"},
{"nombre": "Notisureste", "crawler": "descarga_por_fecha/notisureste", "desde": "28-11-2011", "url": "http://www.notisureste.com/"},
{"nombre": "Proceso", "crawler": "descarga_por_mes/proceso", "desde": "11-1976", "url": "http://www.proceso.com.mx/"},
{"nombre": "Punto Medio", "crawler": "descarga_por_fecha/puntoMedio", "desde": "02-08-2015", "url": "https://www.puntomedio.mx/"},
{"nombre": "Sona 89.3", "crawler": "descarga_por_fecha/sona893", "desde": "09-04-2012", "url": "http://sona893.fm/"},
{"nombre": "Yucatán a la Mano", "crawler": "descarga_por_fecha/yucatanALaMano", "desde": "11-05-2015", "url": "http://www.yucatanalamano.com/"},
{"nombre": "Yucatán al Minuto", "crawler": "descarga_por_fecha/yucatanAlMinuto", "desde": "17-01-2017", "url": "http://www.yucatanalminuto.com/"},
{"nombre": "Yucatán en Corto", "crawler": "descarga_por_fecha/yucatanEnCorto", "desde": "02-04-2011", "url": "http://florcastillo.mx/noticias/"}
]
\ No newline at end of file
......@@ -70,7 +70,3 @@ with open(sys.argv[1]) as data_file:
os.chdir("..")
print today.year
# scrapy crawl noticias -t json -o $y-$m-$d.json -a year=$y -a month=$m -a day=$d # ejecucion del crawler correspondiente segun el sitio
\ No newline at end of file
This source diff could not be displayed because it is too large. You can view the blob instead.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment