arreglado cuestion de polemica

parent 9fded341
...@@ -42,13 +42,8 @@ class NoticiasSpider(scrapy.Spider): ...@@ -42,13 +42,8 @@ class NoticiasSpider(scrapy.Spider):
for post in data: for post in data:
try: try:
# Validar contenido
content = post.get('content', {}).get('rendered', '').strip() content = post.get('content', {}).get('rendered', '').strip()
if not remove_tags(content): if content:
self.logger.warning(f"Skipped post {post.get('id')}: No meaningful content.")
continue
# Obtener categoría del artículo
class_list = post.get('class_list', {}) class_list = post.get('class_list', {})
topic = None topic = None
if isinstance(class_list, dict): if isinstance(class_list, dict):
...@@ -61,24 +56,9 @@ class NoticiasSpider(scrapy.Spider): ...@@ -61,24 +56,9 @@ class NoticiasSpider(scrapy.Spider):
item['text'] = remove_tags(content) item['text'] = remove_tags(content)
item['topic'] = topic item['topic'] = topic
item['url'] = post.get('link') item['url'] = post.get('link')
print(item['title'])
# Enlace al autor
author_link = post.get('_links', {}).get('author', [{}])[0].get('href', None)
if author_link:
yield scrapy.Request(url=author_link, callback=self.parse_author, meta={'item': item})
else:
item['author'] = 'Unknown'
yield item yield item
except Exception as e: except Exception as e:
self.logger.error(f"Error processing post {post.get('id')}: {e}") self.logger.error(f"Error processing post {post.get('id')}: {e}")
continue continue
def parse_author(self, response):
"""Procesa la información del autor de un artículo."""
try:
item = response.meta['item'] # Recupera el item pasado a través de meta
author_data = json.loads(response.text)
item['author'] = author_data.get('name', 'Unknown')
yield item # Devuelve el item completo con el nombre del autor incluido
except Exception as e:
self.logger.error(f"Failed to parse author data: {e}")
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment