refactor: 重构项目配置管理,统一使用.env配置
- 新增config.py统一读取.env配置,移除硬编码路径和参数 - 重构collect_jrxml.py支持命令行参数和环境变量配置源目录 - 新增.env.example示例配置文件,整理所有可配置项 - 重构down_embedding_model.py、import_to_chroma.py等所有脚本使用统一配置 - 新增Windows一键部署脚本setup.bat - 修正jrxml_banch_chunker.py的文件名拼写错误
This commit is contained in:
+11
-7
@@ -11,11 +11,12 @@ from pathlib import Path
|
||||
import numpy as np
|
||||
import chromadb
|
||||
from tqdm import tqdm
|
||||
from config import EMBEDDINGS_DIR, CHROMA_DB_PATH, CHROMA_COLLECTION_NAME
|
||||
|
||||
|
||||
def main(embeddings_dir: str = None,
|
||||
chroma_path: str = None,
|
||||
collection_name: str = "jrxml_chunks"):
|
||||
collection_name: str = None):
|
||||
"""
|
||||
从 embeddings 目录读取向量和 chunks,导入 Chroma 持久化数据库
|
||||
|
||||
@@ -27,15 +28,18 @@ def main(embeddings_dir: str = None,
|
||||
project_root = Path(__file__).resolve().parent
|
||||
|
||||
if embeddings_dir is None:
|
||||
embeddings_dir = project_root / "embeddings"
|
||||
embeddings_dir = EMBEDDINGS_DIR
|
||||
else:
|
||||
embeddings_dir = Path(embeddings_dir)
|
||||
|
||||
if chroma_path is None:
|
||||
chroma_path = project_root / "chroma_db"
|
||||
chroma_path = CHROMA_DB_PATH
|
||||
else:
|
||||
chroma_path = Path(chroma_path)
|
||||
|
||||
if collection_name is None:
|
||||
collection_name = CHROMA_COLLECTION_NAME
|
||||
|
||||
embeddings_file = embeddings_dir / "embeddings.npy"
|
||||
chunks_file = embeddings_dir / "chunks.json"
|
||||
|
||||
@@ -164,11 +168,11 @@ if __name__ == "__main__":
|
||||
import argparse
|
||||
parser = argparse.ArgumentParser(description="JRXML Chunks 导入 Chroma 工具")
|
||||
parser.add_argument("--embeddings_dir", "-e", default=None,
|
||||
help="向量文件目录 (默认: embeddings)")
|
||||
help=f"向量文件目录 (默认: {EMBEDDINGS_DIR})")
|
||||
parser.add_argument("--chroma_path", "-c", default=None,
|
||||
help="Chroma 数据库路径 (默认: chroma_db)")
|
||||
parser.add_argument("--collection_name", "-n", default="jrxml_chunks",
|
||||
help="集合名称 (默认: jrxml_chunks)")
|
||||
help=f"Chroma 数据库路径 (默认: {CHROMA_DB_PATH})")
|
||||
parser.add_argument("--collection_name", "-n", default=CHROMA_COLLECTION_NAME,
|
||||
help=f"集合名称 (默认: {CHROMA_COLLECTION_NAME})")
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
|
||||
Reference in New Issue
Block a user