pipelines.py 1.85 KB
Newer Older
Renán Sosa Guillen's avatar
Renán Sosa Guillen committed
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75
# -*- coding: utf-8 -*-

# Define your item pipelines here
#
# Don't forget to add your pipeline to the ITEM_PIPELINES setting
# See: http://doc.scrapy.org/en/latest/topics/item-pipeline.html

import json
from collections import OrderedDict


class JsonWriterPipeline(object):

    def __init__(self, filename):
        self.filename = filename

    @classmethod
    def from_crawler(cls, crawler):
        # Here you get whatever value was passed through the "filename" command line parameter
        settings = crawler.settings
        filename = settings.get('filename')

        # Instantiate the pipeline with the file name
        return cls(filename)

    def open_spider(self, spider):
        self.counter = 0
        self.file = open(self.filename, 'w')
        self.file.write("[")

    def close_spider(self, spider):
        self.file.write("]")
        self.file.close()

    def process_item(self, item, spider):
        # print("this is my item", item)
        row = []
        try:
            row.append(("date", item['date']))
        except:
            pass
        try:
            row.append(("topic", item['topic']))
        except:
            pass
        try:
            row.append(("title", item['title']))
        except:
            pass
        try:
            row.append(("author", item['author']))
        except:
            pass
        try:
            row.append(("location", item['location']))
        except:
            pass
        try:
            row.append(("text", item['text']))
        except:
            pass
        try:
            row.append(("url", item['url']))
        except:
            pass

        line = OrderedDict(row)

        self.counter += 1
        if self.counter == 1:
            self.file.write(json.dumps(line))
        elif self.counter > 1:
            self.file.write(",\n" + json.dumps(line))

        return item