arreglado cuestion de polemica

parent 9fded341
...@@ -42,43 +42,23 @@ class NoticiasSpider(scrapy.Spider): ...@@ -42,43 +42,23 @@ class NoticiasSpider(scrapy.Spider):
for post in data: for post in data:
try: try:
# Validar contenido
content = post.get('content', {}).get('rendered', '').strip() content = post.get('content', {}).get('rendered', '').strip()
if not remove_tags(content): if content:
self.logger.warning(f"Skipped post {post.get('id')}: No meaningful content.") class_list = post.get('class_list', {})
continue topic = None
if isinstance(class_list, dict):
topic = class_list.get('7', '').split("category-")[1] if '7' in class_list else None
# Obtener categoría del artículo # Preparar item
class_list = post.get('class_list', {}) item = CuestiondepolemicaItem()
topic = None item['date'] = post.get('date')
if isinstance(class_list, dict): item['title'] = remove_tags(post.get('title', {}).get('rendered', ''))
topic = class_list.get('7', '').split("category-")[1] if '7' in class_list else None item['text'] = remove_tags(content)
item['topic'] = topic
# Preparar item item['url'] = post.get('link')
item = CuestiondepolemicaItem() print(item['title'])
item['date'] = post.get('date')
item['title'] = remove_tags(post.get('title', {}).get('rendered', ''))
item['text'] = remove_tags(content)
item['topic'] = topic
item['url'] = post.get('link')
# Enlace al autor
author_link = post.get('_links', {}).get('author', [{}])[0].get('href', None)
if author_link:
yield scrapy.Request(url=author_link, callback=self.parse_author, meta={'item': item})
else:
item['author'] = 'Unknown'
yield item yield item
except Exception as e: except Exception as e:
self.logger.error(f"Error processing post {post.get('id')}: {e}") self.logger.error(f"Error processing post {post.get('id')}: {e}")
continue continue
def parse_author(self, response):
"""Procesa la información del autor de un artículo."""
try:
item = response.meta['item'] # Recupera el item pasado a través de meta
author_data = json.loads(response.text)
item['author'] = author_data.get('name', 'Unknown')
yield item # Devuelve el item completo con el nombre del autor incluido
except Exception as e:
self.logger.error(f"Failed to parse author data: {e}")
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment