Files
python/python爬虫/scrapy/scrapy02/scrapy02/pipelines.py
T
2025-08-05 09:19:34 +08:00

91 lines
2.5 KiB
Python

# Define your item pipelines here
#
# Don't forget to add your pipeline to the ITEM_PIPELINES setting
# See: https://docs.scrapy.org/en/latest/topics/item-pipeline.html
# useful for handling different item types with a single interface
from itemadapter import ItemAdapter
import pymysql
from scrapy02.settings import MYSQL
import pymongo
"""
存储数据的方案:
1.csv
2.mysql
3.mongodb
4.文件存储
"""
class Scrapy02Pipeline:
def open_spider(self, spider):
print("数据爬取开始")
self.f = open("shuangseqiu.csv", mode="a", encoding="utf-8")
def close_spider(self, spider):
print("数据爬取结束")
if self.f:
self.f.close()
def process_item(self, item, spider):
# csv写入
self.f.write(f"{item['qihao']},{'_'.join(item['red_ball'])},{item['blue_ball']}\n")
return item
class Scrapy02MySQLPipeline:
def open_spider(self, spider):
print("数据爬取开始")
self.conn = pymysql.connect(
host=MYSQL["host"],
port=MYSQL["port"],
user=MYSQL["user"],
password=MYSQL["password"],
database=MYSQL["database"]
)
def close_spider(self, spider):
print("数据爬取结束")
self.conn.close()
def process_item(self, item, spider):
# mysql写入
try:
print("1")
cursor = self.conn.cursor()
sql = "insert into caipiao (qihao, red_ball, blue_ball) values (%s,%s,%s)"
cursor.execute(sql, (item['qihao'], '_'.join(item['red_ball']), item['blue_ball']))
self.conn.commit()
except:
self.conn.rollback()
finally:
if cursor:
cursor.close()
return item
class Scrapy02MongoDbPipeline:
def open_spider(self, spider):
print("数据爬取开始")
self.client = pymongo.MongoClient(host="localhost", port=27017)
db = self.client["python"] # use database
# db.authenticate("python_admin","123123") #登录
self.collection = db["caipiao"] # 指定集合
def close_spider(self, spider):
print("数据爬取结束")
self.client.close()
def process_item(self, item, spider):
# mongodb写入
document = {
"qihao": item["qihao"],
"red_ball": item["red_ball"],
"blue_ball": item["blue_ball"]
}
# 插入合并后的字典
self.collection.insert_one(document)
return item