383 lines
12 KiB
Python
383 lines
12 KiB
Python
|
|
import os
|
|||
|
|
import sys
|
|||
|
|
import platform
|
|||
|
|
import threading
|
|||
|
|
from typing import List, Dict, Optional, BinaryIO, Tuple, Any
|
|||
|
|
from datetime import datetime, timedelta
|
|||
|
|
import hashlib
|
|||
|
|
from io import BytesIO
|
|||
|
|
from minio import Minio
|
|||
|
|
from minio.error import S3Error, MinioException
|
|||
|
|
from utils.logger import log
|
|||
|
|
|
|||
|
|
|
|||
|
|
class MinIOAgent:
|
|||
|
|
"""
|
|||
|
|
全平台兼容的MinIO对象存储操作类
|
|||
|
|
支持Windows/macOS/Linux系统,提供对象存储的上传、下载、查询等功能
|
|||
|
|
专注于二进制数据处理,返回元数据用于与MySQL关联
|
|||
|
|
"""
|
|||
|
|
_instance = None # 单例模式实例
|
|||
|
|
_lock = threading.Lock() # 线程锁,保证单例线程安全
|
|||
|
|
|
|||
|
|
def __new__(cls, *args, **kwargs):
|
|||
|
|
"""单例模式实现,确保全局只有一个实例"""
|
|||
|
|
if not cls._instance:
|
|||
|
|
with cls._lock:
|
|||
|
|
if not cls._instance:
|
|||
|
|
cls._instance = super().__new__(cls)
|
|||
|
|
return cls._instance
|
|||
|
|
|
|||
|
|
def __init__(self, config: dict):
|
|||
|
|
"""
|
|||
|
|
初始化MinIO连接
|
|||
|
|
|
|||
|
|
参数:
|
|||
|
|
config (dict): MinIO配置字典,包含以下键:
|
|||
|
|
- endpoint: 服务端点(例:'localhost:9000')
|
|||
|
|
- access_key: 访问密钥
|
|||
|
|
- secret_key: 密钥
|
|||
|
|
- [可选] secure: 是否使用SSL(默认False)
|
|||
|
|
- [可选] region: 区域
|
|||
|
|
- [可选] timeout: 超时时间(秒,默认30)
|
|||
|
|
"""
|
|||
|
|
# 避免重复初始化
|
|||
|
|
if hasattr(self, '_client') and self._client:
|
|||
|
|
return
|
|||
|
|
|
|||
|
|
# 验证必要配置参数
|
|||
|
|
required_keys = ['endpoint', 'access_key', 'secret_key']
|
|||
|
|
if not all(key in config for key in required_keys):
|
|||
|
|
raise ValueError(f"MinIO配置缺少必要参数,需要: {required_keys}")
|
|||
|
|
|
|||
|
|
# 整合配置,设置默认值
|
|||
|
|
self.config = {
|
|||
|
|
'endpoint': config['endpoint'],
|
|||
|
|
'access_key': config['access_key'],
|
|||
|
|
'secret_key': config['secret_key'],
|
|||
|
|
'secure': config.get('secure', False),
|
|||
|
|
'region': config.get('region'),
|
|||
|
|
'timeout': config.get('timeout', 30)
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
# 初始化日志,绑定当前平台信息
|
|||
|
|
current_platform = platform.system()
|
|||
|
|
self.log = log.bind(module=f"MinIOAgent({current_platform})")
|
|||
|
|
|
|||
|
|
# 创建客户端实例
|
|||
|
|
self._client = self._create_client()
|
|||
|
|
|
|||
|
|
# 验证连接是否有效
|
|||
|
|
self._verify_connection()
|
|||
|
|
|
|||
|
|
def _create_client(self) -> Minio:
|
|||
|
|
"""创建MinIO客户端实例"""
|
|||
|
|
try:
|
|||
|
|
client = Minio(
|
|||
|
|
endpoint=self.config['endpoint'],
|
|||
|
|
access_key=self.config['access_key'],
|
|||
|
|
secret_key=self.config['secret_key'],
|
|||
|
|
secure=self.config['secure'],
|
|||
|
|
region=self.config['region']
|
|||
|
|
)
|
|||
|
|
self.log.info("MinIO客户端创建成功")
|
|||
|
|
return client
|
|||
|
|
except Exception as e:
|
|||
|
|
self.log.critical("创建MinIO客户端失败", 错误=str(e), exc_info=True)
|
|||
|
|
raise
|
|||
|
|
|
|||
|
|
def _verify_connection(self) -> None:
|
|||
|
|
"""验证与MinIO服务的连接是否正常"""
|
|||
|
|
try:
|
|||
|
|
# 通过列出存储桶来验证连接
|
|||
|
|
self._client.list_buckets()
|
|||
|
|
self.log.info(f"成功连接到MinIO服务:{self.config['endpoint']}")
|
|||
|
|
except Exception as e:
|
|||
|
|
self.log.critical("连接验证失败", 错误=str(e), exc_info=True)
|
|||
|
|
raise
|
|||
|
|
|
|||
|
|
def create_bucket(self, bucket_name: str) -> bool:
|
|||
|
|
"""
|
|||
|
|
创建存储桶(如不存在)
|
|||
|
|
|
|||
|
|
参数:
|
|||
|
|
bucket_name: 存储桶名称
|
|||
|
|
|
|||
|
|
返回:
|
|||
|
|
是否成功创建(或已存在)
|
|||
|
|
"""
|
|||
|
|
try:
|
|||
|
|
if not self._client.bucket_exists(bucket_name):
|
|||
|
|
self._client.make_bucket(bucket_name)
|
|||
|
|
self.log.info(f"存储桶创建成功:{bucket_name}")
|
|||
|
|
return True
|
|||
|
|
self.log.debug(f"存储桶已存在:{bucket_name}")
|
|||
|
|
return True
|
|||
|
|
except MinioException as e:
|
|||
|
|
self.log.error(f"创建存储桶 {bucket_name} 失败", 错误=str(e), exc_info=True)
|
|||
|
|
return False
|
|||
|
|
|
|||
|
|
def upload_bytes(self, bucket: str, object_name: str, data: bytes) -> Dict[str, Any]:
|
|||
|
|
"""
|
|||
|
|
上传二进制数据至MinIO
|
|||
|
|
|
|||
|
|
参数:
|
|||
|
|
bucket: 存储桶名称
|
|||
|
|
object_name: 对象名称(路径)
|
|||
|
|
data: 二进制数据
|
|||
|
|
|
|||
|
|
返回:
|
|||
|
|
包含元数据的字典:
|
|||
|
|
- bucket: 存储桶名称
|
|||
|
|
- object_name: 对象路径
|
|||
|
|
- size: 数据大小(字节)
|
|||
|
|
- etag: 服务器生成的哈希值
|
|||
|
|
- content_type: 内容类型
|
|||
|
|
- upload_time: 上传时间(UTC)
|
|||
|
|
- local_hash: 本地计算的MD5哈希
|
|||
|
|
"""
|
|||
|
|
if not data:
|
|||
|
|
raise ValueError("上传数据不能为空")
|
|||
|
|
|
|||
|
|
# 确保存储桶存在
|
|||
|
|
self.create_bucket(bucket)
|
|||
|
|
|
|||
|
|
try:
|
|||
|
|
# 计算本地哈希(用于数据完整性校验)
|
|||
|
|
local_hash = hashlib.md5(data).hexdigest()
|
|||
|
|
|
|||
|
|
# 上传数据
|
|||
|
|
result = self._client.put_object(
|
|||
|
|
bucket_name=bucket,
|
|||
|
|
object_name=object_name,
|
|||
|
|
data=BytesIO(data),
|
|||
|
|
length=len(data),
|
|||
|
|
content_type=self._guess_content_type(object_name)
|
|||
|
|
)
|
|||
|
|
|
|||
|
|
# 构建元数据
|
|||
|
|
metadata = {
|
|||
|
|
'bucket': bucket,
|
|||
|
|
'object_name': object_name,
|
|||
|
|
'size': len(data),
|
|||
|
|
'etag': result.etag,
|
|||
|
|
'content_type': result.content_type,
|
|||
|
|
'upload_time': datetime.utcfromtimestamp(result.last_modified.timestamp()),
|
|||
|
|
'local_hash': local_hash
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
self.log.info(
|
|||
|
|
"文件上传成功",
|
|||
|
|
存储桶=bucket,
|
|||
|
|
对象名称=object_name,
|
|||
|
|
大小=len(data)
|
|||
|
|
)
|
|||
|
|
return metadata
|
|||
|
|
|
|||
|
|
except MinioException as e:
|
|||
|
|
self.log.error(
|
|||
|
|
"文件上传失败",
|
|||
|
|
存储桶=bucket,
|
|||
|
|
对象名称=object_name,
|
|||
|
|
错误=str(e),
|
|||
|
|
exc_info=True
|
|||
|
|
)
|
|||
|
|
raise
|
|||
|
|
|
|||
|
|
def download_file(self, bucket: str, object_name: str, local_path: str) -> Dict[str, Any]:
|
|||
|
|
"""
|
|||
|
|
从MinIO下载文件至本地
|
|||
|
|
|
|||
|
|
参数:
|
|||
|
|
bucket: 存储桶名称
|
|||
|
|
object_name: 对象名称(路径)
|
|||
|
|
local_path: 本地保存路径
|
|||
|
|
|
|||
|
|
返回:
|
|||
|
|
包含下载信息的字典:
|
|||
|
|
- local_path: 本地路径
|
|||
|
|
- size: 文件大小
|
|||
|
|
- download_time: 下载时间
|
|||
|
|
"""
|
|||
|
|
try:
|
|||
|
|
# 创建父目录(如果不存在)
|
|||
|
|
os.makedirs(os.path.dirname(local_path), exist_ok=True)
|
|||
|
|
|
|||
|
|
# 下载文件
|
|||
|
|
start_time = datetime.now()
|
|||
|
|
self._client.fget_object(bucket, object_name, local_path)
|
|||
|
|
download_time = datetime.now() - start_time
|
|||
|
|
|
|||
|
|
# 获取文件信息
|
|||
|
|
stat = os.stat(local_path)
|
|||
|
|
|
|||
|
|
result = {
|
|||
|
|
'local_path': local_path,
|
|||
|
|
'size': stat.st_size,
|
|||
|
|
'download_time': download_time.total_seconds(),
|
|||
|
|
'downloaded_at': datetime.now()
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
self.log.info(
|
|||
|
|
"文件下载成功",
|
|||
|
|
存储桶=bucket,
|
|||
|
|
对象名称=object_name,
|
|||
|
|
本地路径=local_path,
|
|||
|
|
大小=stat.st_size
|
|||
|
|
)
|
|||
|
|
return result
|
|||
|
|
|
|||
|
|
except MinioException as e:
|
|||
|
|
self.log.error(
|
|||
|
|
"文件下载失败",
|
|||
|
|
存储桶=bucket,
|
|||
|
|
对象名称=object_name,
|
|||
|
|
错误=str(e),
|
|||
|
|
exc_info=True
|
|||
|
|
)
|
|||
|
|
raise
|
|||
|
|
except IOError as e:
|
|||
|
|
self.log.error(
|
|||
|
|
"本地文件操作失败",
|
|||
|
|
本地路径=local_path,
|
|||
|
|
错误=str(e),
|
|||
|
|
exc_info=True
|
|||
|
|
)
|
|||
|
|
raise
|
|||
|
|
|
|||
|
|
def get_presigned_url(self, bucket: str, object_name: str, expires: int = 3600) -> Dict[str, str]:
|
|||
|
|
"""
|
|||
|
|
生成临时访问URL
|
|||
|
|
|
|||
|
|
参数:
|
|||
|
|
bucket: 存储桶名称
|
|||
|
|
object_name: 对象名称(路径)
|
|||
|
|
expires: 过期时间(秒),默认3600秒
|
|||
|
|
|
|||
|
|
返回:
|
|||
|
|
包含URL和过期信息的字典
|
|||
|
|
"""
|
|||
|
|
try:
|
|||
|
|
url = self._client.presigned_get_object(
|
|||
|
|
bucket_name=bucket,
|
|||
|
|
object_name=object_name,
|
|||
|
|
expires=expires
|
|||
|
|
)
|
|||
|
|
|
|||
|
|
result = {
|
|||
|
|
'presigned_url': url,
|
|||
|
|
'expires_in': expires,
|
|||
|
|
'expires_at': datetime.now() + timedelta(seconds=expires),
|
|||
|
|
'bucket': bucket,
|
|||
|
|
'object_name': object_name
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
self.log.debug(
|
|||
|
|
"预签名URL生成成功",
|
|||
|
|
存储桶=bucket,
|
|||
|
|
对象名称=object_name,
|
|||
|
|
过期时间=expires
|
|||
|
|
)
|
|||
|
|
return result
|
|||
|
|
|
|||
|
|
except MinioException as e:
|
|||
|
|
self.log.error(
|
|||
|
|
"生成预签名URL失败",
|
|||
|
|
存储桶=bucket,
|
|||
|
|
对象名称=object_name,
|
|||
|
|
错误=str(e),
|
|||
|
|
exc_info=True
|
|||
|
|
)
|
|||
|
|
raise
|
|||
|
|
|
|||
|
|
def list_objects(self, bucket: str, prefix: str = "") -> List[Dict[str, Any]]:
|
|||
|
|
"""
|
|||
|
|
查询指定前缀的对象列表及元数据
|
|||
|
|
|
|||
|
|
参数:
|
|||
|
|
bucket: 存储桶名称
|
|||
|
|
prefix: 对象路径前缀
|
|||
|
|
|
|||
|
|
返回:
|
|||
|
|
对象信息列表,每个对象包含:
|
|||
|
|
- bucket: 存储桶
|
|||
|
|
- object_name: 对象名称
|
|||
|
|
- size: 大小
|
|||
|
|
- last_modified: 最后修改时间
|
|||
|
|
- etag: 哈希值
|
|||
|
|
- content_type: 内容类型
|
|||
|
|
"""
|
|||
|
|
try:
|
|||
|
|
objects = self._client.list_objects(
|
|||
|
|
bucket_name=bucket,
|
|||
|
|
prefix=prefix,
|
|||
|
|
recursive=True
|
|||
|
|
)
|
|||
|
|
|
|||
|
|
result = []
|
|||
|
|
for obj in objects:
|
|||
|
|
# 获取详细元数据
|
|||
|
|
stat = self._client.stat_object(bucket, obj.object_name)
|
|||
|
|
|
|||
|
|
result.append({
|
|||
|
|
'bucket': bucket,
|
|||
|
|
'object_name': obj.object_name,
|
|||
|
|
'size': obj.size,
|
|||
|
|
'last_modified': obj.last_modified,
|
|||
|
|
'etag': stat.etag,
|
|||
|
|
'content_type': stat.content_type
|
|||
|
|
})
|
|||
|
|
|
|||
|
|
self.log.info(
|
|||
|
|
"对象列表查询成功",
|
|||
|
|
存储桶=bucket,
|
|||
|
|
前缀=prefix,
|
|||
|
|
数量=len(result)
|
|||
|
|
)
|
|||
|
|
return result
|
|||
|
|
|
|||
|
|
except MinioException as e:
|
|||
|
|
self.log.error(
|
|||
|
|
"查询对象列表失败",
|
|||
|
|
存储桶=bucket,
|
|||
|
|
前缀=prefix,
|
|||
|
|
错误=str(e),
|
|||
|
|
exc_info=True
|
|||
|
|
)
|
|||
|
|
raise
|
|||
|
|
|
|||
|
|
def delete_object(self, bucket: str, object_name: str) -> bool:
|
|||
|
|
"""
|
|||
|
|
删除指定对象
|
|||
|
|
|
|||
|
|
参数:
|
|||
|
|
bucket: 存储桶名称
|
|||
|
|
object_name: 对象名称(路径)
|
|||
|
|
|
|||
|
|
返回:
|
|||
|
|
是否删除成功
|
|||
|
|
"""
|
|||
|
|
try:
|
|||
|
|
self._client.remove_object(bucket, object_name)
|
|||
|
|
self.log.info(
|
|||
|
|
"对象删除成功",
|
|||
|
|
存储桶=bucket,
|
|||
|
|
对象名称=object_name
|
|||
|
|
)
|
|||
|
|
return True
|
|||
|
|
except MinioException as e:
|
|||
|
|
self.log.error(
|
|||
|
|
"删除对象失败",
|
|||
|
|
存储桶=bucket,
|
|||
|
|
对象名称=object_name,
|
|||
|
|
错误=str(e),
|
|||
|
|
exc_info=True
|
|||
|
|
)
|
|||
|
|
return False
|
|||
|
|
|
|||
|
|
@staticmethod
|
|||
|
|
def _guess_content_type(object_name: str) -> str:
|
|||
|
|
"""根据文件名猜测内容类型"""
|
|||
|
|
from mimetypes import guess_type
|
|||
|
|
mime_type, _ = guess_type(object_name)
|
|||
|
|
return mime_type or 'application/octet-stream' # 默认二进制流类型
|