91 lines
2.5 KiB
Python
91 lines
2.5 KiB
Python
# Define your item pipelines here
|
|
#
|
|
# Don't forget to add your pipeline to the ITEM_PIPELINES setting
|
|
# See: https://docs.scrapy.org/en/latest/topics/item-pipeline.html
|
|
|
|
|
|
# useful for handling different item types with a single interface
|
|
from itemadapter import ItemAdapter
|
|
import pymysql
|
|
from scrapy02.settings import MYSQL
|
|
import pymongo
|
|
|
|
"""
|
|
存储数据的方案:
|
|
1.csv
|
|
2.mysql
|
|
3.mongodb
|
|
4.文件存储
|
|
"""
|
|
|
|
|
|
class Scrapy02Pipeline:
|
|
def open_spider(self, spider):
|
|
print("数据爬取开始")
|
|
self.f = open("shuangseqiu.csv", mode="a", encoding="utf-8")
|
|
|
|
def close_spider(self, spider):
|
|
print("数据爬取结束")
|
|
if self.f:
|
|
self.f.close()
|
|
|
|
def process_item(self, item, spider):
|
|
# csv写入
|
|
self.f.write(f"{item['qihao']},{'_'.join(item['red_ball'])},{item['blue_ball']}\n")
|
|
return item
|
|
|
|
|
|
class Scrapy02MySQLPipeline:
|
|
def open_spider(self, spider):
|
|
print("数据爬取开始")
|
|
self.conn = pymysql.connect(
|
|
host=MYSQL["host"],
|
|
port=MYSQL["port"],
|
|
user=MYSQL["user"],
|
|
password=MYSQL["password"],
|
|
database=MYSQL["database"]
|
|
)
|
|
|
|
def close_spider(self, spider):
|
|
print("数据爬取结束")
|
|
self.conn.close()
|
|
|
|
def process_item(self, item, spider):
|
|
# mysql写入
|
|
try:
|
|
print("1")
|
|
cursor = self.conn.cursor()
|
|
sql = "insert into caipiao (qihao, red_ball, blue_ball) values (%s,%s,%s)"
|
|
cursor.execute(sql, (item['qihao'], '_'.join(item['red_ball']), item['blue_ball']))
|
|
self.conn.commit()
|
|
except:
|
|
self.conn.rollback()
|
|
finally:
|
|
if cursor:
|
|
cursor.close()
|
|
return item
|
|
|
|
|
|
class Scrapy02MongoDbPipeline:
|
|
def open_spider(self, spider):
|
|
print("数据爬取开始")
|
|
self.client = pymongo.MongoClient(host="localhost", port=27017)
|
|
db = self.client["python"] # use database
|
|
# db.authenticate("python_admin","123123") #登录
|
|
self.collection = db["caipiao"] # 指定集合
|
|
|
|
def close_spider(self, spider):
|
|
print("数据爬取结束")
|
|
self.client.close()
|
|
|
|
def process_item(self, item, spider):
|
|
# mongodb写入
|
|
document = {
|
|
"qihao": item["qihao"],
|
|
"red_ball": item["red_ball"],
|
|
"blue_ball": item["blue_ball"]
|
|
}
|
|
# 插入合并后的字典
|
|
self.collection.insert_one(document)
|
|
return item
|