animalPolitico

681da2fc · Ulises Morales Ramírez · 131e7b80 · 681da2fc
Commit 681da2fc authored Jan 15, 2025 by Ulises Morales Ramírez
Hide whitespace changes
Inline Side-by-side

Showing with 20 additions and 9 deletions

noticias.py ...s/daily/animalPolitico/animalPolitico/spiders/noticias.py +20 -9

No files found.
--- a/spiders/daily/animalPolitico/animalPolitico/spiders/noticias.py
+++ b/spiders/daily/animalPolitico/animalPolitico/spiders/noticias.py
 """
-	Spider for jornada.com.mx
-	Author: Mario Chirinos Coluga
+	Spider for animalpolitico.com
+	Author: Ulises Morales Ramirez
 	Usage:scrapy crawl noticias --nolog -O 2017-04-23.json -a year=2017 -a month=4 -a day=23 

 """
@@ -106,10 +106,21 @@ class NoticiasSpider(scrapy.Spider):
        
        for edge in data.get("data", {}).get("allPostTypes", {}).get("edges", []):
            node = edge.get("node", {})
-            item['date']  = node.get("date")
-            item['title'] = remove_tags(node.get("title"))
-            item['topic'] = remove_tags(node.get("contentTypeName"))
-            item['text']  = remove_tags(node.get("contentRendered"))
-            item['url']   = "https://"+self.allowed_domains[0]+"/" + node.get("uri")
-            item['author'] = node.get("author", {}).get("node", {}).get("name")
-            yield item
+    
+            date = node.get("date")
+            title = node.get("title")
+            topic = node.get("contentTypeName")
+            text = node.get("contentRendered")
+            uri = node.get("uri")
+            author_name = node.get("author", {}).get("node", {}).get("name")
+            
+            if all([date, title, topic, text, uri, author_name]):  # Verificar que todos los campos existen
+                item = {
+                    'date': date,
+                    'title': remove_tags(title),
+                    'topic': remove_tags(topic),
+                    'text': remove_tags(text),
+                    'url': "https://"+self.allowed_domains[0]+ uri,
+                    'author': author_name
+                }
+                yield item