arreglado cuestion de polemica

1d65b97e · Ulises Morales Ramírez · 9fded341 · 1d65b97e
Commit 1d65b97e authored Jan 21, 2025 by Ulises Morales Ramírez
Show whitespace changes
Inline Side-by-side

Showing with 14 additions and 34 deletions

noticias.py ...cuestionDePolemica/cuestionDePolemica/spiders/noticias.py +14 -34

No files found.
--- a/spiders/daily/cuestionDePolemica/cuestionDePolemica/spiders/noticias.py
+++ b/spiders/daily/cuestionDePolemica/cuestionDePolemica/spiders/noticias.py
@@ -42,13 +42,8 @@ class NoticiasSpider(scrapy.Spider):
        for post in data:
            try:
-                # Validar contenido
                content = post.get('content', {}).get('rendered', '').strip()
-                if not remove_tags(content):
+                if content:
-                    self.logger.warning(f"Skipped post {post.get('id')}: No meaningful content.")
-                    continue
-                # Obtener categoría del artículo
                    class_list = post.get('class_list', {})
                    topic = None
                    if isinstance(class_list, dict):
@@ -61,24 +56,9 @@ class NoticiasSpider(scrapy.Spider):
                    item['text'] = remove_tags(content)
                    item['topic'] = topic
                    item['url'] = post.get('link')
+                    print(item['title'])
-                # Enlace al autor
-                author_link = post.get('_links', {}).get('author', [{}])[0].get('href', None)
-                if author_link:
-                    yield scrapy.Request(url=author_link, callback=self.parse_author, meta={'item': item})
-                else:
-                    item['author'] = 'Unknown'
                    yield item
            except Exception as e:
                self.logger.error(f"Error processing post {post.get('id')}: {e}")
                continue
-    def parse_author(self, response):
-        """Procesa la información del autor de un artículo."""
-        try:
-            item = response.meta['item']  # Recupera el item pasado a través de meta
-            author_data = json.loads(response.text)
-            item['author'] = author_data.get('name', 'Unknown')
-            yield item  # Devuelve el item completo con el nombre del autor incluido
-        except Exception as e:
-            self.logger.error(f"Failed to parse author data: {e}")