91 lines
2.7 KiB
Python
91 lines
2.7 KiB
Python
|
|
"""测试多格式文件解析器:XLSX, XLS, DOC。"""
|
||
|
|
|
||
|
|
import tempfile
|
||
|
|
from pathlib import Path
|
||
|
|
|
||
|
|
import pytest
|
||
|
|
|
||
|
|
|
||
|
|
def _make_xlsx(path: str) -> None:
|
||
|
|
"""生成最小 .xlsx 测试文件。"""
|
||
|
|
from openpyxl import Workbook
|
||
|
|
wb = Workbook()
|
||
|
|
ws = wb.active
|
||
|
|
ws.title = "Sheet1"
|
||
|
|
ws["A1"] = "名称"
|
||
|
|
ws["B1"] = "金额"
|
||
|
|
ws["A2"] = "项目A"
|
||
|
|
ws["B2"] = 100
|
||
|
|
ws["A3"] = "项目B"
|
||
|
|
ws["B3"] = 200
|
||
|
|
wb.save(path)
|
||
|
|
|
||
|
|
|
||
|
|
def _make_xls(path: str) -> None:
|
||
|
|
"""生成最小 .xls 测试文件。"""
|
||
|
|
from xlwt import Workbook
|
||
|
|
wb = Workbook()
|
||
|
|
ws = wb.add_sheet("Sheet1")
|
||
|
|
ws.write(0, 0, "名称")
|
||
|
|
ws.write(0, 1, "金额")
|
||
|
|
ws.write(1, 0, "项目A")
|
||
|
|
ws.write(1, 1, 100)
|
||
|
|
ws.write(2, 0, "项目B")
|
||
|
|
ws.write(2, 1, 200)
|
||
|
|
wb.save(path)
|
||
|
|
|
||
|
|
|
||
|
|
class TestMultiFormatParsers:
|
||
|
|
"""测试 file_parser.py 的多格式解析器。"""
|
||
|
|
|
||
|
|
def test_parse_xlsx(self):
|
||
|
|
from backend.file_parser import parse_file
|
||
|
|
|
||
|
|
with tempfile.NamedTemporaryFile(suffix=".xlsx", delete=False) as tmp:
|
||
|
|
path = tmp.name
|
||
|
|
try:
|
||
|
|
_make_xlsx(path)
|
||
|
|
result = parse_file(path, ".xlsx")
|
||
|
|
assert result["file_type"] == "xlsx"
|
||
|
|
assert result["method"] == "openpyxl"
|
||
|
|
assert result["error"] is None
|
||
|
|
assert "Sheet1" in result["text"]
|
||
|
|
assert "项目A" in result["text"]
|
||
|
|
assert "100" in result["text"]
|
||
|
|
finally:
|
||
|
|
Path(path).unlink(missing_ok=True)
|
||
|
|
|
||
|
|
def test_parse_xls(self):
|
||
|
|
from backend.file_parser import parse_file
|
||
|
|
|
||
|
|
with tempfile.NamedTemporaryFile(suffix=".xls", delete=False) as tmp:
|
||
|
|
path = tmp.name
|
||
|
|
try:
|
||
|
|
_make_xls(path)
|
||
|
|
result = parse_file(path, ".xls")
|
||
|
|
assert result["file_type"] == "xls"
|
||
|
|
assert result["method"] == "xlrd"
|
||
|
|
assert result["error"] is None
|
||
|
|
assert "Sheet1" in result["text"]
|
||
|
|
assert "项目A" in result["text"]
|
||
|
|
assert "100.0" in result["text"]
|
||
|
|
finally:
|
||
|
|
Path(path).unlink(missing_ok=True)
|
||
|
|
|
||
|
|
def test_parse_doc_nonexistent(self):
|
||
|
|
"""测试 .doc 文件不存在时的错误处理。"""
|
||
|
|
from backend.file_parser import parse_file
|
||
|
|
|
||
|
|
result = parse_file("/nonexistent/file.doc", ".doc")
|
||
|
|
assert result["file_type"] == ".doc"
|
||
|
|
assert result["method"] == "none"
|
||
|
|
assert result.get("error") is not None
|
||
|
|
|
||
|
|
def test_dispatch_adds_new_formats(self):
|
||
|
|
"""验证新格式已在 parse_file 调度表中注册。"""
|
||
|
|
from backend.file_parser import parse_file
|
||
|
|
|
||
|
|
for ext in [".xlsx", ".xls", ".doc"]:
|
||
|
|
result = parse_file("/tmp/test" + ext, ext)
|
||
|
|
assert result["file_type"] in (ext, "xlsx", "xls", "doc")
|