Files
python/python爬虫/scrapy/meinv/meinv/pipelines.py
T
2025-08-05 09:19:34 +08:00

61 lines
1.9 KiB
Python

# Define your item pipelines here
#
# Don't forget to add your pipeline to the ITEM_PIPELINES setting
# See: https://docs.scrapy.org/en/latest/topics/item-pipeline.html
# useful for handling different item types with a single interface
from itemadapter import ItemAdapter
from scrapy.pipelines.images import ImagesPipeline
import scrapy
import pymysql
from meinv.settings import MYSQL
class MeinvPipeline:
def open_spider(self, spider):
print("数据爬取开始")
self.conn = pymysql.connect(
host=MYSQL["host"],
port=MYSQL["port"],
user=MYSQL["user"],
password=MYSQL["password"],
database=MYSQL["database"]
)
def close_spider(self, spider):
print("数据爬取结束")
self.conn.close()
def process_item(self, item, spider):
# mysql写入
try:
cursor = self.conn.cursor()
sql = "insert into tu (name, img) values (%s,%s)"
cursor.execute(sql, (item['name'], item["local_path"]))
self.conn.commit()
print("写入成功")
except:
self.conn.rollback()
finally:
if cursor:
cursor.close()
return item
# 想使用image需要在setting中设置存放位置IMAGES_STORE
class MeinvSavePipeline(ImagesPipeline): # 利用图片管道下载
def get_media_requests(self, item, info): # 负责下载
return scrapy.Request(item["img"]) # 直接返回结果
def file_path(self, request, response=None, info=None, *, item=None): # 准备文件路径
file_name = request.url.split("/")[-1]
return f"img/{file_name}" # 创建img/***
def item_completed(self, results, item, info): # 返回文件详细信息
ok, finfo = results[0]
path = finfo['path']
item["local_path"] = path
print(results)
return item