python/python爬虫/scrapy/scrapy02/scrapy02/pipelines.py

# Define your item pipelines here
#
# Don't forget to add your pipeline to the ITEM_PIPELINES setting
# See: https://docs.scrapy.org/en/latest/topics/item-pipeline.html


# useful for handling different item types with a single interface
from itemadapter import ItemAdapter
import pymysql
from scrapy02.settings import MYSQL
import pymongo

"""
存储数据的方案：
    1.csv
    2.mysql
    3.mongodb
    4.文件存储
"""


class Scrapy02Pipeline:
    def open_spider(self, spider):
        print("数据爬取开始")
        self.f = open("shuangseqiu.csv", mode="a", encoding="utf-8")

    def close_spider(self, spider):
        print("数据爬取结束")
        if self.f:
            self.f.close()

    def process_item(self, item, spider):
        # csv写入
        self.f.write(f"{item['qihao']},{'_'.join(item['red_ball'])},{item['blue_ball']}\n")
        return item


class Scrapy02MySQLPipeline:
    def open_spider(self, spider):
        print("数据爬取开始")
        self.conn = pymysql.connect(
            host=MYSQL["host"],
            port=MYSQL["port"],
            user=MYSQL["user"],
            password=MYSQL["password"],
            database=MYSQL["database"]
        )

    def close_spider(self, spider):
        print("数据爬取结束")
        self.conn.close()

    def process_item(self, item, spider):
        # mysql写入
        try:
            print("1")
            cursor = self.conn.cursor()
            sql = "insert into caipiao (qihao, red_ball, blue_ball) values (%s,%s,%s)"
            cursor.execute(sql, (item['qihao'], '_'.join(item['red_ball']), item['blue_ball']))
            self.conn.commit()
        except:
            self.conn.rollback()
        finally:
            if cursor:
                cursor.close()
        return item


class Scrapy02MongoDbPipeline:
    def open_spider(self, spider):
        print("数据爬取开始")
        self.client = pymongo.MongoClient(host="localhost", port=27017)
        db = self.client["python"]  # use database
        # db.authenticate("python_admin","123123") #登录
        self.collection = db["caipiao"]  # 指定集合

    def close_spider(self, spider):
        print("数据爬取结束")
        self.client.close()

    def process_item(self, item, spider):
        # mongodb写入
        document = {
            "qihao": item["qihao"],
            "red_ball": item["red_ball"],
            "blue_ball": item["blue_ball"]
        }
        # 插入合并后的字典
        self.collection.insert_one(document)
        return item