61 lines
1.9 KiB
Python
61 lines
1.9 KiB
Python
# Define your item pipelines here
|
|
#
|
|
# Don't forget to add your pipeline to the ITEM_PIPELINES setting
|
|
# See: https://docs.scrapy.org/en/latest/topics/item-pipeline.html
|
|
|
|
|
|
# useful for handling different item types with a single interface
|
|
from itemadapter import ItemAdapter
|
|
from scrapy.pipelines.images import ImagesPipeline
|
|
import scrapy
|
|
import pymysql
|
|
from meinv.settings import MYSQL
|
|
|
|
|
|
class MeinvPipeline:
|
|
def open_spider(self, spider):
|
|
print("数据爬取开始")
|
|
self.conn = pymysql.connect(
|
|
host=MYSQL["host"],
|
|
port=MYSQL["port"],
|
|
user=MYSQL["user"],
|
|
password=MYSQL["password"],
|
|
database=MYSQL["database"]
|
|
)
|
|
|
|
def close_spider(self, spider):
|
|
print("数据爬取结束")
|
|
self.conn.close()
|
|
|
|
def process_item(self, item, spider):
|
|
# mysql写入
|
|
try:
|
|
cursor = self.conn.cursor()
|
|
sql = "insert into tu (name, img) values (%s,%s)"
|
|
cursor.execute(sql, (item['name'], item["local_path"]))
|
|
self.conn.commit()
|
|
print("写入成功")
|
|
except:
|
|
self.conn.rollback()
|
|
finally:
|
|
if cursor:
|
|
cursor.close()
|
|
return item
|
|
|
|
|
|
# 想使用image需要在setting中设置存放位置IMAGES_STORE
|
|
class MeinvSavePipeline(ImagesPipeline): # 利用图片管道下载
|
|
def get_media_requests(self, item, info): # 负责下载
|
|
return scrapy.Request(item["img"]) # 直接返回结果
|
|
|
|
def file_path(self, request, response=None, info=None, *, item=None): # 准备文件路径
|
|
file_name = request.url.split("/")[-1]
|
|
return f"img/{file_name}" # 创建img/***
|
|
|
|
def item_completed(self, results, item, info): # 返回文件详细信息
|
|
ok, finfo = results[0]
|
|
path = finfo['path']
|
|
item["local_path"] = path
|
|
print(results)
|
|
return item
|